def getMnistPIModel(rng, srng): ''' Constructs a real-valued CNN model. @param rng: Numpy rng object @param srng: rng object used by theano @return layer: The CNN model @return p_dict: Dictionary containing the shared variables with the model parameters. This helps to easily store parameters to a file during/after training ''' weight_decay = 1e-4 p_dict = {} # Layer 1: FC1200 layer = LayerInput((784, )) layer = LayerDropout(layer, 0.2, srng) layer = LayerFC(layer, 1200, rng, srng, weight_type='real', regularizer='l2', regularizer_weight=weight_decay, initial_parameters=None) p_dict = addParametersToDict(layer, 0, p_dict) layer = LayerBatchnorm(layer, alpha=0.1, initial_parameters=None) p_dict = addParametersToDict(layer, 0, p_dict) layer = LayerActivationTanh(layer) # Layer 2: FC1200 layer = LayerDropout(layer, 0.4, srng) layer = LayerFC(layer, 1200, rng, srng, weight_type='real', regularizer='l2', regularizer_weight=weight_decay, initial_parameters=None) p_dict = addParametersToDict(layer, 1, p_dict) layer = LayerBatchnorm(layer, alpha=0.1, initial_parameters=None) p_dict = addParametersToDict(layer, 1, p_dict) layer = LayerActivationTanh(layer) # Layer 3: FC10 layer = LayerDropout(layer, 0.4, srng) layer = LayerFC(layer, 10, rng, srng, weight_type='real', regularizer='l2', regularizer_weight=weight_decay, enable_bias=True, bias_type='real', initial_parameters=None) p_dict = addParametersToDict(layer, 2, p_dict) layer = LayerOutput(layer, 10, objective='crossentropy') return layer, p_dict
def getMnistModel(rng, srng): ''' Constructs a real-valued CNN model. @param rng: Numpy rng object @param srng: rng object used by theano @return layer: The CNN model @return p_dict: Dictionary containing the shared variables with the model parameters. This helps to easily store parameters to a file during/after training ''' weight_decay = 1e-6 p_dict = {} # Layer 1: 32C3 layer = LayerInput((1,28,28)) layer = LayerConv(layer, 32, (5,5), (1,1), 'half', rng, srng, weight_type='real', regularizer='l2', regularizer_weight=weight_decay, initial_parameters=None) p_dict = addParametersToDict(layer, 0, p_dict) layer = LayerBatchnorm(layer, alpha=0.1, initial_parameters=None) p_dict = addParametersToDict(layer, 0, p_dict) layer = LayerActivationTanh(layer) layer = LayerPooling(layer, (2,2), mode='max') # Layer 2: 64C5-P2 layer = LayerDropout(layer, 0.2, srng) layer = LayerConv(layer, 64, (5,5), (1,1), 'half', rng, srng, weight_type='real', regularizer='l2', regularizer_weight=weight_decay, initial_parameters=None) p_dict = addParametersToDict(layer, 1, p_dict) layer = LayerBatchnorm(layer, alpha=0.1, initial_parameters=None) p_dict = addParametersToDict(layer, 1, p_dict) layer = LayerActivationTanh(layer) layer = LayerPooling(layer, (2,2), mode='max') layer = LayerFlatten(layer) # Layer 3: FC512 layer = LayerDropout(layer, 0.3, srng) layer = LayerFC(layer, 512, rng, srng, weight_type='real', regularizer='l2', regularizer_weight=weight_decay, initial_parameters=None) p_dict = addParametersToDict(layer, 2, p_dict) layer = LayerBatchnorm(layer, alpha=0.1, initial_parameters=None) p_dict = addParametersToDict(layer, 2, p_dict) layer = LayerActivationTanh(layer) # Layer 4: FC10 layer = LayerFC(layer, 10, rng, srng, weight_type='real', regularizer='l2', regularizer_weight=weight_decay, enable_bias=True, bias_type='real', initial_parameters=None) p_dict = addParametersToDict(layer, 3, p_dict) layer = LayerOutput(layer, 10, objective='crossentropy') return layer, p_dict
def getMnistPIModel(init_p_dict, rng, srng): ''' Constructs a CNN model with ternary weights and sign activation function. @param init_p_dict: Dict containing the initial parameters. @param rng: Numpy rng object @param srng: rng object used by theano @return layer: The CNN model @return p_dict: Dictionary containing the shared variables with the model parameters. This helps to easily store parameters to a file during/after training ''' regularization_weight = 1e-10 weight_type = 'ternary' p_dict = {} # Layer 1: FC1200 layer = LayerInput((784,)) layer = LayerDropout(layer, 0.1, srng) layer = LayerFC(layer, 1200, rng, srng, weight_type=weight_type, weight_parameterization='logits', weight_initialization_method='probability', regularizer='shayer', regularizer_weight=regularization_weight, logit_bounds=(-5., 5.), initial_parameters=getInitialParametersFromDict(init_p_dict, 0, 'linearforward')) p_dict = addParametersToDict(layer, 0, p_dict) layer = LayerBatchnorm(layer, alpha=0.1, average_statistics_over_predictions=True, initial_parameters=getInitialParametersFromDict(init_p_dict, 0, 'batchnorm')) p_dict = addParametersToDict(layer, 0, p_dict) layer = LayerActivationSign(layer) layer = LayerLocalReparameterization(layer, srng) # Layer 2: FC1200 layer = LayerDropout(layer, 0.2, srng) layer = LayerFC(layer, 1200, rng, srng, weight_type=weight_type, weight_parameterization='logits', weight_initialization_method='probability', regularizer='shayer', regularizer_weight=regularization_weight, logit_bounds=(-5., 5.), initial_parameters=getInitialParametersFromDict(init_p_dict, 1, 'linearforward')) p_dict = addParametersToDict(layer, 1, p_dict) layer = LayerBatchnorm(layer, alpha=0.1, average_statistics_over_predictions=True, initial_parameters=getInitialParametersFromDict(init_p_dict, 1, 'batchnorm')) p_dict = addParametersToDict(layer, 1, p_dict) layer = LayerActivationSign(layer) layer = LayerLocalReparameterization(layer, srng) # Layer 3: FC10 layer = LayerDropout(layer, 0.3, srng) layer = LayerFC(layer, 10, rng, srng, weight_type=weight_type, weight_parameterization='logits', weight_initialization_method='probability', regularizer='shayer', regularizer_weight=regularization_weight, enable_bias=True, bias_type='real', enable_activation_normalization=True, logit_bounds=(-5., 5.), initial_parameters=getInitialParametersFromDict(init_p_dict, 2, 'linearforward')) p_dict = addParametersToDict(layer, 2, p_dict) layer = LayerLocalReparameterization(layer, srng) layer = LayerOutput(layer, 10, objective='crossentropy') return layer, p_dict
def getSvhnModel(init_p_dict, rng, srng): ''' Constructs a CNN model with ternary weights and sign activation function. @param init_p_dict: Dict containing the initial parameters. @param rng: Numpy rng object @param srng: rng object used by theano @return layer: The CNN model @return p_dict: Dictionary containing the shared variables with the model parameters. This helps to easily store parameters to a file during/after training ''' regularization_weight = 1e-10 weight_type = 'ternary' p_dict = {} # Layer 1: 64C3 layer = LayerInput((3, 32, 32)) layer = LayerConv(layer, 64, (3, 3), (1, 1), 'half', rng, srng, weight_type=weight_type, weight_parameterization='logits', weight_initialization_method='probability', regularizer='shayer', regularizer_weight=regularization_weight, logit_bounds=(-5., 5.), initial_parameters=getInitialParametersFromDict( init_p_dict, 0, 'linearforward')) p_dict = addParametersToDict(layer, 0, p_dict) layer = LayerBatchnorm(layer, alpha=0.1, average_statistics_over_predictions=True, initial_parameters=getInitialParametersFromDict( init_p_dict, 0, 'batchnorm')) p_dict = addParametersToDict(layer, 0, p_dict) layer = LayerActivationSign(layer) layer = LayerLocalReparameterization(layer, srng) # Layer 2: 64C3-P2 layer = LayerDropout(layer, 0.2, srng) layer = LayerConv(layer, 64, (3, 3), (1, 1), 'half', rng, srng, weight_type=weight_type, weight_parameterization='logits', weight_initialization_method='probability', regularizer='shayer', regularizer_weight=regularization_weight, logit_bounds=(-5., 5.), initial_parameters=getInitialParametersFromDict( init_p_dict, 1, 'linearforward')) p_dict = addParametersToDict(layer, 1, p_dict) layer = LayerPooling(layer, (2, 2), mode='max') layer = LayerBatchnorm(layer, alpha=0.1, average_statistics_over_predictions=True, initial_parameters=getInitialParametersFromDict( init_p_dict, 1, 'batchnorm')) p_dict = addParametersToDict(layer, 1, p_dict) layer = LayerActivationSign(layer) layer = LayerLocalReparameterization(layer, srng) # Layer 3: 128C3 layer = LayerDropout(layer, 0.2, srng) layer = LayerConv(layer, 128, (3, 3), (1, 1), 'half', rng, srng, weight_type=weight_type, weight_parameterization='logits', weight_initialization_method='probability', regularizer='shayer', regularizer_weight=regularization_weight, logit_bounds=(-5., 5.), initial_parameters=getInitialParametersFromDict( init_p_dict, 2, 'linearforward')) p_dict = addParametersToDict(layer, 2, p_dict) layer = LayerBatchnorm(layer, alpha=0.1, average_statistics_over_predictions=True, initial_parameters=getInitialParametersFromDict( init_p_dict, 2, 'batchnorm')) p_dict = addParametersToDict(layer, 2, p_dict) layer = LayerActivationSign(layer) layer = LayerLocalReparameterization(layer, srng) # Layer 4: 128C3-P2 layer = LayerDropout(layer, 0.3, srng) layer = LayerConv(layer, 128, (3, 3), (1, 1), 'half', rng, srng, weight_type=weight_type, weight_parameterization='logits', weight_initialization_method='probability', regularizer='shayer', regularizer_weight=regularization_weight, logit_bounds=(-5., 5.), initial_parameters=getInitialParametersFromDict( init_p_dict, 3, 'linearforward')) p_dict = addParametersToDict(layer, 3, p_dict) layer = LayerPooling(layer, (2, 2), mode='max') layer = LayerBatchnorm(layer, alpha=0.1, average_statistics_over_predictions=True, initial_parameters=getInitialParametersFromDict( init_p_dict, 3, 'batchnorm')) p_dict = addParametersToDict(layer, 3, p_dict) layer = LayerActivationSign(layer) layer = LayerLocalReparameterization(layer, srng) # Layer 5: 256C3 layer = LayerDropout(layer, 0.3, srng) layer = LayerConv(layer, 256, (3, 3), (1, 1), 'half', rng, srng, weight_type=weight_type, weight_parameterization='logits', weight_initialization_method='probability', regularizer='shayer', regularizer_weight=regularization_weight, logit_bounds=(-5., 5.), initial_parameters=getInitialParametersFromDict( init_p_dict, 4, 'linearforward')) p_dict = addParametersToDict(layer, 4, p_dict) layer = LayerBatchnorm(layer, alpha=0.1, average_statistics_over_predictions=True, initial_parameters=getInitialParametersFromDict( init_p_dict, 4, 'batchnorm')) p_dict = addParametersToDict(layer, 4, p_dict) layer = LayerActivationSign(layer) layer = LayerLocalReparameterization(layer, srng) # Layer 6: 256C3-P2 layer = LayerDropout(layer, 0.3, srng) layer = LayerConv(layer, 256, (3, 3), (1, 1), 'half', rng, srng, weight_type=weight_type, weight_parameterization='logits', weight_initialization_method='probability', regularizer='shayer', regularizer_weight=regularization_weight, logit_bounds=(-5., 5.), initial_parameters=getInitialParametersFromDict( init_p_dict, 5, 'linearforward')) p_dict = addParametersToDict(layer, 5, p_dict) layer = LayerPooling(layer, (2, 2), mode='max') layer = LayerBatchnorm(layer, alpha=0.1, average_statistics_over_predictions=True, initial_parameters=getInitialParametersFromDict( init_p_dict, 5, 'batchnorm')) p_dict = addParametersToDict(layer, 5, p_dict) layer = LayerActivationSign(layer) layer = LayerLocalReparameterization(layer, srng) layer = LayerFlatten(layer) # Layer 7: FC1024 layer = LayerDropout(layer, 0.4, srng) layer = LayerFC(layer, 1024, rng, srng, weight_type=weight_type, weight_parameterization='logits', weight_initialization_method='probability', regularizer='shayer', regularizer_weight=regularization_weight, logit_bounds=(-5., 5.), initial_parameters=getInitialParametersFromDict( init_p_dict, 6, 'linearforward')) p_dict = addParametersToDict(layer, 6, p_dict) layer = LayerBatchnorm(layer, alpha=0.1, average_statistics_over_predictions=True, initial_parameters=getInitialParametersFromDict( init_p_dict, 6, 'batchnorm')) p_dict = addParametersToDict(layer, 6, p_dict) layer = LayerActivationSign(layer) layer = LayerLocalReparameterization(layer, srng) # Layer 8: FC10 layer = LayerFC(layer, 10, rng, srng, weight_type=weight_type, weight_parameterization='logits', weight_initialization_method='probability', regularizer='shayer', regularizer_weight=regularization_weight, enable_bias=True, bias_type='real', enable_activation_normalization=True, logit_bounds=(-5., 5.), initial_parameters=getInitialParametersFromDict( init_p_dict, 7, 'linearforward')) p_dict = addParametersToDict(layer, 7, p_dict) layer = LayerLocalReparameterization(layer, srng) layer = LayerOutput(layer, 10, objective='crossentropy') return layer, p_dict