def __init__( self, rng, input, n_in, lstm_n_hiddens, mlp_hidden_specs, srng=None, lstm_parameters=None, mlp_parameters=None, output_type="last", prefix="lstms_mlp", truncate_gradient=-1): self.rng = rng self.srng = srng self.output_type = output_type self.input = input self.n_in = n_in self.lstm_n_hiddens = lstm_n_hiddens self.mlp_hidden_specs = mlp_hidden_specs self.truncate_gradient = truncate_gradient self.layers = [] self.l2 = 0. self.mlp_hidden_specs = mlp_hidden_specs for layer_spec in mlp_hidden_specs: mlp.activation_str_to_op(layer_spec) self.lstms = MultiLayerLSTM( self.rng, self.input, self.n_in, self.lstm_n_hiddens, parameters=lstm_parameters, output_type=self.output_type, prefix=prefix + "_lstms", truncate_gradient=self.truncate_gradient) self.lstm_parameters = self.lstms.parameters self.l2 += self.lstms.l2 self.parameters = self.lstms.parameters[:] # get the mlp parameters set up so that we can determine initilization as needed if mlp_parameters is not None: mlp_parameters = mlp_parameters[:] else: mlp_parameters = None # these are loop constants that we update and keep track of cur_input = self.lstms.output cur_n_in = self.lstm_n_hiddens[-1] self.mlp_layers = [] self.mlp_parameters = [] for i_layer, layer_spec in enumerate(self.mlp_hidden_specs): if mlp_parameters is not None: W = mlp_parameters.pop(0) b = mlp_parameters.pop(0) else: W = None b = None layer =mlp.HiddenLayer( rng=rng, input=cur_input, d_in=cur_n_in, d_out=layer_spec["units"], activation=layer_spec["activation"], W=W, b=b) self.mlp_layers.append(layer) cur_input = layer.output cur_n_in = layer_spec["units"] self.mlp_parameters.extend([layer.W, layer.b]) self.l2 += (layer.W**2).sum() self.output = cur_input self.layers.extend(self.lstms.layers[:]) self.layers.extend(self.mlp_layers[:]) self.parameters.extend(self.mlp_parameters[:])
def build_cnn_layers(rng, input, input_shape, conv_layer_specs, hidden_layer_specs, srng=None, dropout_rates=None, init_W=None, init_b=None): """ Return the layers of a CNN consisting of a number of convolutional layers followed by a number of fully-connected hidden layers. The convolutional layers are built according to `conv_layer_specs`, a list of dict which gives the specifications for each layer. Each dict has fields "filter_shape" and "pool_shape". The filter shapes are given as (n_out_filters, n_in_channels, filter_height, filter_width) while the pool shapes are (height, width). As an example, a network with single-channel (28, 28) shaped input images with 2 convolutional layers followed by 2 fully-connected layers could be built using: batch_size = 10 rng = np.random.RandomState(42) input = T.matrix("x") input_shape=(batch_size, 1, 28, 28) conv_layer_specs = [ { "filter_shape": (20, 1, 5, 5), "pool_shape": (2, 2), "activation": theano_utils.relu }, { "filter_shape": (50, 20, 5, 5), "pool_shape": (2, 2)}, "activation": theano_utils.relu } ] hidden_layer_specs = [ {"units": 500, "activation": theano_utils.relu}, {"units": 500, "activation": theano_utils.relu} ] cnn_layers = build_cnn_layers( rng, input, input_shape, conv_layer_specs, hidden_layer_specs ) Parameters ---------- input : symbolic tensor Input to the first layer of the CNN. The first dimension should be across data instances. input_shape : (int, int, int, int) The shape of the input: (n_data, n_channels, height, width). conv_layer_specs : list of dict Specifications for the convolutional layers. hidden_layer_specs : list of dict Specifications for the fully-connected hidden layers. dropout_rates : list of float The dropout rates for each of the layers (including the convolutional layers); if not provided, dropout is not performed. init_W : list of shared tensors If provided, these weights are used for layer initialization. The weights should be given in the same order that the layers are created (i.e. first the convolutional weights and then the fully-connected hidden layer weights). This is useful for tying weights. init_b : list of shared vectors If provided, these biases are used for layer initialization. The order should be the same as that of `init_W`. """ assert len(conv_layer_specs) > 0, "Use MLP class if no convolutional layers" assert ( dropout_rates is None or len(dropout_rates) == len(conv_layer_specs) + len(hidden_layer_specs) ) conv_layer_specs = copy.deepcopy(conv_layer_specs) hidden_layer_specs = copy.deepcopy(hidden_layer_specs) for layer_spec in conv_layer_specs: mlp.activation_str_to_op(layer_spec) for layer_spec in hidden_layer_specs: mlp.activation_str_to_op(layer_spec) if init_W is not None: assert init_b is not None # We are going to pop parameters, so make copies init_W = init_W[:] init_b = init_b[:] layers = [] if dropout_rates is not None: dropout_layers = [] # Build convolutional layers for i_layer in xrange(len(conv_layer_specs)): if i_layer == 0: cur_input_shape = input_shape cur_input = input.reshape(input_shape) else: batch_size, prev_n_in_channels, prev_in_height, prev_in_width = prev_input_shape prev_n_out_filters, prev_n_in_channels, prev_filter_height, prev_filter_width = ( prev_filter_shape ) prev_pool_height, prev_pool_width = prev_pool_shape cur_input_shape = ( batch_size, prev_n_out_filters, int(np.floor(1. * (prev_in_height - prev_filter_height + 1) / prev_pool_height)), int(np.floor(1. * (prev_in_width - prev_filter_width + 1) / prev_pool_width)) ) cur_input = layers[-1].output if init_W is not None: W = init_W.pop(0) b = init_b.pop(0) else: W = None b = None cur_activation = conv_layer_specs[i_layer]["activation"] layer = ConvMaxPoolLayer( rng, input=cur_input, input_shape=cur_input_shape, filter_shape=conv_layer_specs[i_layer]["filter_shape"], pool_shape=conv_layer_specs[i_layer]["pool_shape"], activation=cur_activation, W=W, b=b ) layers.append(layer) if dropout_rates is not None: if i_layer == 0: cur_dropout_input = input.reshape(input_shape) else: cur_dropout_input = dropout_layers[-1].output dropout_rate = dropout_rates[i_layer] dropout_layer = DropoutConvMaxPoolLayer( rng, srng, dropout_rate, input=cur_dropout_input, input_shape=cur_input_shape, filter_shape=conv_layer_specs[i_layer]["filter_shape"], pool_shape=conv_layer_specs[i_layer]["pool_shape"], activation=cur_activation, W=layer.W / (1. - dropout_rate), b=layer.b ) dropout_layers.append(dropout_layer) # Store shapes for next layer prev_input_shape = cur_input_shape prev_filter_shape = conv_layer_specs[i_layer]["filter_shape"] prev_pool_shape = conv_layer_specs[i_layer]["pool_shape"] # Build fully-connected hidden layers for i_layer in xrange(len(hidden_layer_specs)): if i_layer == 0: # Shapes from last convolutional layer batch_size, prev_n_in_channels, prev_in_height, prev_in_width = prev_input_shape prev_n_out_filters, prev_n_in_channels, prev_filter_height, prev_filter_width = ( prev_filter_shape ) prev_pool_height, prev_pool_width = prev_pool_shape cur_d_in = ( prev_n_out_filters * int(np.floor(1. * (prev_in_height - prev_filter_height + 1) / prev_pool_height)) * int(np.floor(1. * (prev_in_width - prev_filter_width + 1) / prev_pool_width)) ) cur_input = layers[-1].output.flatten(2) else: cur_d_in = hidden_layer_specs[i_layer - 1]["units"] cur_input = layers[-1].output if init_W is not None: W = init_W.pop(0) b = init_b.pop(0) else: W = None b = None cur_activation = hidden_layer_specs[i_layer]["activation"] layer = mlp.HiddenLayer( rng=rng, input=cur_input, d_in=cur_d_in, d_out=hidden_layer_specs[i_layer]["units"], activation=cur_activation, W=W, b=b ) layers.append(layer) if dropout_rates is not None: if i_layer == 0: cur_dropout_input = dropout_layers[-1].output.flatten(2) else: cur_dropout_input = dropout_layers[-1].output dropout_rate = dropout_rates[len(conv_layer_specs) + i_layer] dropout_layer = mlp.DropoutHiddenLayer( rng=rng, srng=srng, dropout_rate=dropout_rate, input=cur_dropout_input, d_in=cur_d_in, d_out=hidden_layer_specs[i_layer]["units"], activation=cur_activation, W=layer.W / (1. - dropout_rate), b=layer.b ) dropout_layers.append(dropout_layer) if dropout_rates is not None: return (dropout_layers, layers) return layers
def build_cnn_layers(rng, input, input_shape, conv_layer_specs, hidden_layer_specs, srng=None, dropout_rates=None, init_W=None, init_b=None): """ Return the layers of a CNN consisting of a number of convolutional layers followed by a number of fully-connected hidden layers. The convolutional layers are built according to `conv_layer_specs`, a list of dict which gives the specifications for each layer. Each dict has fields "filter_shape" and "pool_shape". The filter shapes are given as (n_out_filters, n_in_channels, filter_height, filter_width) while the pool shapes are (height, width). As an example, a network with single-channel (28, 28) shaped input images with 2 convolutional layers followed by 2 fully-connected layers could be built using: batch_size = 10 rng = np.random.RandomState(42) input = T.matrix("x") input_shape=(batch_size, 1, 28, 28) conv_layer_specs = [ { "filter_shape": (20, 1, 5, 5), "pool_shape": (2, 2), "activation": theano_utils.relu }, { "filter_shape": (50, 20, 5, 5), "pool_shape": (2, 2)}, "activation": theano_utils.relu } ] hidden_layer_specs = [ {"units": 500, "activation": theano_utils.relu}, {"units": 500, "activation": theano_utils.relu} ] cnn_layers = build_cnn_layers( rng, input, input_shape, conv_layer_specs, hidden_layer_specs ) Parameters ---------- input : symbolic tensor Input to the first layer of the CNN. The first dimension should be across data instances. input_shape : (int, int, int, int) The shape of the input: (n_data, n_channels, height, width). conv_layer_specs : list of dict Specifications for the convolutional layers. hidden_layer_specs : list of dict Specifications for the fully-connected hidden layers. dropout_rates : list of float The dropout rates for each of the layers (including the convolutional layers); if not provided, dropout is not performed. init_W : list of shared tensors If provided, these weights are used for layer initialization. The weights should be given in the same order that the layers are created (i.e. first the convolutional weights and then the fully-connected hidden layer weights). This is useful for tying weights. init_b : list of shared vectors If provided, these biases are used for layer initialization. The order should be the same as that of `init_W`. """ assert len( conv_layer_specs) > 0, "Use MLP class if no convolutional layers" assert (dropout_rates is None or len(dropout_rates) == len(conv_layer_specs) + len(hidden_layer_specs)) conv_layer_specs = copy.deepcopy(conv_layer_specs) hidden_layer_specs = copy.deepcopy(hidden_layer_specs) for layer_spec in conv_layer_specs: mlp.activation_str_to_op(layer_spec) for layer_spec in hidden_layer_specs: mlp.activation_str_to_op(layer_spec) if init_W is not None: assert init_b is not None # We are going to pop parameters, so make copies init_W = init_W[:] init_b = init_b[:] layers = [] if dropout_rates is not None: dropout_layers = [] # Build convolutional layers for i_layer in xrange(len(conv_layer_specs)): if i_layer == 0: cur_input_shape = input_shape cur_input = input.reshape(input_shape) else: batch_size, prev_n_in_channels, prev_in_height, prev_in_width = prev_input_shape prev_n_out_filters, prev_n_in_channels, prev_filter_height, prev_filter_width = ( prev_filter_shape) prev_pool_height, prev_pool_width = prev_pool_shape cur_input_shape = ( batch_size, prev_n_out_filters, int( np.floor(1. * (prev_in_height - prev_filter_height + 1) / prev_pool_height)), int( np.floor(1. * (prev_in_width - prev_filter_width + 1) / prev_pool_width))) cur_input = layers[-1].output if init_W is not None: W = init_W.pop(0) b = init_b.pop(0) else: W = None b = None cur_activation = conv_layer_specs[i_layer]["activation"] layer = ConvMaxPoolLayer( rng, input=cur_input, input_shape=cur_input_shape, filter_shape=conv_layer_specs[i_layer]["filter_shape"], pool_shape=conv_layer_specs[i_layer]["pool_shape"], activation=cur_activation, W=W, b=b) layers.append(layer) if dropout_rates is not None: if i_layer == 0: cur_dropout_input = input.reshape(input_shape) else: cur_dropout_input = dropout_layers[-1].output dropout_rate = dropout_rates[i_layer] dropout_layer = DropoutConvMaxPoolLayer( rng, srng, dropout_rate, input=cur_dropout_input, input_shape=cur_input_shape, filter_shape=conv_layer_specs[i_layer]["filter_shape"], pool_shape=conv_layer_specs[i_layer]["pool_shape"], activation=cur_activation, W=layer.W / (1. - dropout_rate), b=layer.b) dropout_layers.append(dropout_layer) # Store shapes for next layer prev_input_shape = cur_input_shape prev_filter_shape = conv_layer_specs[i_layer]["filter_shape"] prev_pool_shape = conv_layer_specs[i_layer]["pool_shape"] # Build fully-connected hidden layers for i_layer in xrange(len(hidden_layer_specs)): if i_layer == 0: # Shapes from last convolutional layer batch_size, prev_n_in_channels, prev_in_height, prev_in_width = prev_input_shape prev_n_out_filters, prev_n_in_channels, prev_filter_height, prev_filter_width = ( prev_filter_shape) prev_pool_height, prev_pool_width = prev_pool_shape cur_d_in = (prev_n_out_filters * int( np.floor(1. * (prev_in_height - prev_filter_height + 1) / prev_pool_height)) * int( np.floor(1. * (prev_in_width - prev_filter_width + 1) / prev_pool_width))) cur_input = layers[-1].output.flatten(2) else: cur_d_in = hidden_layer_specs[i_layer - 1]["units"] cur_input = layers[-1].output if init_W is not None: W = init_W.pop(0) b = init_b.pop(0) else: W = None b = None cur_activation = hidden_layer_specs[i_layer]["activation"] layer = mlp.HiddenLayer(rng=rng, input=cur_input, d_in=cur_d_in, d_out=hidden_layer_specs[i_layer]["units"], activation=cur_activation, W=W, b=b) layers.append(layer) if dropout_rates is not None: if i_layer == 0: cur_dropout_input = dropout_layers[-1].output.flatten(2) else: cur_dropout_input = dropout_layers[-1].output dropout_rate = dropout_rates[len(conv_layer_specs) + i_layer] dropout_layer = mlp.DropoutHiddenLayer( rng=rng, srng=srng, dropout_rate=dropout_rate, input=cur_dropout_input, d_in=cur_d_in, d_out=hidden_layer_specs[i_layer]["units"], activation=cur_activation, W=layer.W / (1. - dropout_rate), b=layer.b) dropout_layers.append(dropout_layer) if dropout_rates is not None: return (dropout_layers, layers) return layers