def test_mlp_use_bias_pushed_when_not_explicitly_specified(): mlp = MLP(activations=[Tanh(), Tanh(), None], dims=[4, 5, 6, 7], prototype=Linear(use_bias=False), use_bias=True) mlp.push_allocation_config() assert [lin.use_bias for lin in mlp.linear_transformations]
def test_mlp_use_bias_not_pushed_when_not_explicitly_specified(): mlp = MLP(activations=[Tanh(), Tanh(), None], dims=[4, 5, 6, 7], prototype=Linear(use_bias=False)) mlp.push_allocation_config() assert [not lin.use_bias for lin in mlp.linear_transformations]
def build_submodel(image_size, num_channels, L_dim_conv_layers, L_filter_size, L_pool_size, L_activation_conv, L_dim_full_layers, L_activation_full, dropout, prediction, allow_comment=False, sub_dropout=0, L_pool_step=[], L_pool_padding=[]): # CONVOLUTION params_channels = [10**(-i) for i in range(len(L_dim_conv_layers) + 1)] index_params = 0 params_channels.reverse() output_dim = num_channels * np.prod(image_size) conv_layers = [] assert len(L_dim_conv_layers) == len(L_filter_size) assert len(L_dim_conv_layers) == len(L_pool_size) assert len(L_dim_conv_layers) == len(L_activation_conv) if len(L_pool_step) == 0: L_pool_step = [(1, 1) for i in range(len(L_dim_conv_layers))] L_pool_padding = [(0, 0) for i in range(len(L_dim_conv_layers))] assert len(L_dim_conv_layers) == len(L_pool_step) assert len(L_dim_conv_layers) == len(L_pool_padding) L_conv_dropout = [dropout] * len( L_dim_conv_layers) # unique value of dropout for now convnet = None mlp = None if len(L_dim_conv_layers): for (num_filters, filter_size, pool_size, activation_str, dropout, index, step, padding) in zip(L_dim_conv_layers, L_filter_size, L_pool_size, L_activation_conv, L_conv_dropout, xrange(len(L_dim_conv_layers)), L_pool_step, L_pool_padding): # convert filter_size and pool_size in tuple filter_size = tuple(filter_size) if pool_size is None: pool_size = (0, 0) else: pool_size = tuple(pool_size) # TO DO : leaky relu if activation_str.lower() == 'rectifier': activation = Rectifier() elif activation_str.lower() == 'tanh': activation = Tanh() elif activation_str.lower() in ['sigmoid', 'logistic']: activation = Logistic() elif activation_str.lower() in ['id', 'identity']: activation = Identity() else: raise Exception("unknown activation function : %s", activation_str) assert 0.0 <= dropout and dropout < 1.0 num_filters = num_filters - int(num_filters * dropout) layer_conv = Convolutional(filter_size=filter_size, num_filters=num_filters, name="layer_%d" % index, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.0)) conv_layers.append(layer_conv) conv_layers.append(activation) index_params += 1 if not (pool_size[0] == 0 and pool_size[1] == 0): #pool = MaxPooling(pooling_size=pool_size, step=step, padding=padding) pool = MaxPooling(pooling_size=pool_size) conv_layers.append(pool) convnet = ConvolutionalSequence(conv_layers, num_channels=num_channels, image_size=image_size, name="conv_section") convnet.push_allocation_config() convnet.initialize() output_dim = np.prod(convnet.get_dim('output')) # MLP assert len(L_dim_full_layers) == len(L_activation_full) L_full_dropout = [dropout] * len( L_dim_full_layers) # unique value of dropout for now # reguarding the batch dropout : the dropout is applied on the filter # which is equivalent to the output dimension # you have to look at the dropout_rate of the next layer # that is why we throw away the first value of L_exo_dropout_full_layers pre_dim = output_dim if allow_comment: print "When constructing the model, the output_dim of the conv section is %d." % output_dim activations = [] dims = [pre_dim] if len(L_dim_full_layers): for (dim, activation_str, dropout, index) in zip( L_dim_full_layers, L_activation_full, L_full_dropout, range(len(L_dim_conv_layers), len(L_dim_conv_layers) + len(L_dim_full_layers))): # TO DO : leaky relu if activation_str.lower() == 'rectifier': activation = Rectifier().apply elif activation_str.lower() == 'tanh': activation = Tanh().apply elif activation_str.lower() in ['sigmoid', 'logistic']: activation = Logistic().apply elif activation_str.lower() in ['id', 'identity']: activation = Identity().apply else: raise Exception("unknown activation function : %s", activation_str) activations.append(activation) assert 0.0 <= dropout and dropout < 1.0 dim = dim - int(dim * dropout) if allow_comment: print "When constructing the fully-connected section, we apply dropout %f to add an MLP going from pre_dim %d to dim %d." % ( dropout, pre_dim, dim) dims.append(dim) #now construct the full MLP in one pass: activations.append(Identity()) #params_channels[index_params] dims.append(prediction) mlp = MLP(activations=activations, dims=dims, weights_init=IsotropicGaussian(0.1), biases_init=Constant(0.0), name="layer_%d" % index) mlp.push_allocation_config() mlp.initialize() return (convnet, mlp)