Ejemplo n.º 1
0
    def __init__(
            self, rng, input, n_in, lstm_n_hiddens, mlp_hidden_specs, srng=None,
            lstm_parameters=None,
            mlp_parameters=None, output_type="last", prefix="lstms_mlp", truncate_gradient=-1):
        self.rng = rng
        self.srng = srng
        self.output_type = output_type
        self.input = input
        self.n_in = n_in
        self.lstm_n_hiddens = lstm_n_hiddens
        self.mlp_hidden_specs = mlp_hidden_specs
        self.truncate_gradient = truncate_gradient
        self.layers = []
        self.l2 = 0.

        self.mlp_hidden_specs = mlp_hidden_specs
        for layer_spec in mlp_hidden_specs:
            mlp.activation_str_to_op(layer_spec)
            
        self.lstms = MultiLayerLSTM(
            self.rng, self.input, self.n_in, self.lstm_n_hiddens,
            parameters=lstm_parameters, output_type=self.output_type,
            prefix=prefix + "_lstms", truncate_gradient=self.truncate_gradient)
        self.lstm_parameters = self.lstms.parameters
        self.l2 += self.lstms.l2
        self.parameters = self.lstms.parameters[:]

        
        # get the mlp parameters set up so that we can determine initilization as needed
        if mlp_parameters is not None:
            mlp_parameters = mlp_parameters[:]
        else:
            mlp_parameters = None

        # these are loop constants that we update and keep track of
        cur_input = self.lstms.output
        cur_n_in = self.lstm_n_hiddens[-1]
        self.mlp_layers = []
        self.mlp_parameters = []
        for i_layer, layer_spec in enumerate(self.mlp_hidden_specs):
            if mlp_parameters is not None:
                W = mlp_parameters.pop(0)
                b = mlp_parameters.pop(0)
            else:
                W = None
                b = None

            layer =mlp.HiddenLayer(
                rng=rng, input=cur_input, d_in=cur_n_in, d_out=layer_spec["units"],
                activation=layer_spec["activation"], W=W, b=b)
            self.mlp_layers.append(layer)
            cur_input = layer.output
            cur_n_in = layer_spec["units"]
            self.mlp_parameters.extend([layer.W, layer.b])
            self.l2 += (layer.W**2).sum()
            
        self.output = cur_input
        self.layers.extend(self.lstms.layers[:])
        self.layers.extend(self.mlp_layers[:])
        self.parameters.extend(self.mlp_parameters[:])
Ejemplo n.º 2
0
def build_cnn_layers(rng, input, input_shape, conv_layer_specs,
        hidden_layer_specs, srng=None, dropout_rates=None,
        init_W=None, init_b=None):
    """
    Return the layers of a CNN consisting of a number of convolutional layers
    followed by a number of fully-connected hidden layers.

    The convolutional layers are built according to `conv_layer_specs`, a list
    of dict which gives the specifications for each layer. Each dict has fields
    "filter_shape" and "pool_shape". The filter shapes are given as
    (n_out_filters, n_in_channels, filter_height, filter_width) while the pool
    shapes are (height, width). As an example, a network with single-channel
    (28, 28) shaped input images with 2 convolutional layers followed by 2
    fully-connected layers could be built using:

        batch_size = 10
        rng = np.random.RandomState(42)
        input = T.matrix("x")
        input_shape=(batch_size, 1, 28, 28)
        conv_layer_specs = [
            {
                "filter_shape": (20, 1, 5, 5), "pool_shape": (2, 2),
                "activation": theano_utils.relu
            }, 
            {
                "filter_shape": (50, 20, 5, 5), "pool_shape": (2, 2)},
                "activation": theano_utils.relu
            }
            ]
        hidden_layer_specs = [
            {"units": 500, "activation": theano_utils.relu},
            {"units": 500, "activation": theano_utils.relu}
            ]
        cnn_layers = build_cnn_layers(
            rng, input, input_shape, conv_layer_specs, hidden_layer_specs
            )

    Parameters
    ----------
    input : symbolic tensor
        Input to the first layer of the CNN. The first dimension should be
        across data instances.
    input_shape : (int, int, int, int)
        The shape of the input: (n_data, n_channels, height, width).
    conv_layer_specs : list of dict
        Specifications for the convolutional layers.
    hidden_layer_specs : list of dict
        Specifications for the fully-connected hidden layers.
    dropout_rates : list of float
        The dropout rates for each of the layers (including the convolutional
        layers); if not provided, dropout is not performed.
    init_W : list of shared tensors
        If provided, these weights are used for layer initialization. The
        weights should be given in the same order that the layers are created
        (i.e. first the convolutional weights and then the fully-connected
        hidden layer weights). This is useful for tying weights.
    init_b : list of shared vectors
        If provided, these biases are used for layer initialization. The order
        should be the same as that of `init_W`.
    """

    assert len(conv_layer_specs) > 0, "Use MLP class if no convolutional layers"
    assert (
        dropout_rates is None or 
        len(dropout_rates) == len(conv_layer_specs) + len(hidden_layer_specs)
        )

    conv_layer_specs = copy.deepcopy(conv_layer_specs)
    hidden_layer_specs = copy.deepcopy(hidden_layer_specs)
    for layer_spec in conv_layer_specs:
        mlp.activation_str_to_op(layer_spec)
    for layer_spec in hidden_layer_specs:
        mlp.activation_str_to_op(layer_spec)

    if init_W is not None:
        assert init_b is not None

        # We are going to pop parameters, so make copies
        init_W = init_W[:]
        init_b = init_b[:]

    layers = []
    if dropout_rates is not None:
        dropout_layers = []

    # Build convolutional layers

    for i_layer in xrange(len(conv_layer_specs)):
        if i_layer == 0:
            cur_input_shape = input_shape
            cur_input = input.reshape(input_shape)
        else:
            batch_size, prev_n_in_channels, prev_in_height, prev_in_width = prev_input_shape
            prev_n_out_filters, prev_n_in_channels, prev_filter_height, prev_filter_width = (
                prev_filter_shape
                )
            prev_pool_height, prev_pool_width = prev_pool_shape
            cur_input_shape = (
                batch_size,
                prev_n_out_filters,
                int(np.floor(1. * (prev_in_height - prev_filter_height + 1) / prev_pool_height)),
                int(np.floor(1. * (prev_in_width - prev_filter_width + 1) / prev_pool_width))
                )
            cur_input = layers[-1].output

        if init_W is not None:
            W = init_W.pop(0)
            b = init_b.pop(0)
        else:
            W = None
            b = None
        cur_activation = conv_layer_specs[i_layer]["activation"]

        layer = ConvMaxPoolLayer(
            rng,
            input=cur_input,
            input_shape=cur_input_shape,
            filter_shape=conv_layer_specs[i_layer]["filter_shape"],
            pool_shape=conv_layer_specs[i_layer]["pool_shape"],
            activation=cur_activation,
            W=W,
            b=b
            )
        layers.append(layer)

        if dropout_rates is not None:
            if i_layer == 0:
                cur_dropout_input = input.reshape(input_shape)
            else:
                cur_dropout_input = dropout_layers[-1].output
            dropout_rate = dropout_rates[i_layer]
            dropout_layer = DropoutConvMaxPoolLayer(
                rng, srng, dropout_rate,
                input=cur_dropout_input,
                input_shape=cur_input_shape,
                filter_shape=conv_layer_specs[i_layer]["filter_shape"],
                pool_shape=conv_layer_specs[i_layer]["pool_shape"],
                activation=cur_activation,
                W=layer.W / (1. - dropout_rate),
                b=layer.b
                )
            dropout_layers.append(dropout_layer)

        # Store shapes for next layer
        prev_input_shape = cur_input_shape
        prev_filter_shape = conv_layer_specs[i_layer]["filter_shape"]
        prev_pool_shape = conv_layer_specs[i_layer]["pool_shape"]

    # Build fully-connected hidden layers

    for i_layer in xrange(len(hidden_layer_specs)):

        if i_layer == 0:
            # Shapes from last convolutional layer
            batch_size, prev_n_in_channels, prev_in_height, prev_in_width = prev_input_shape
            prev_n_out_filters, prev_n_in_channels, prev_filter_height, prev_filter_width = (
                prev_filter_shape
                )
            prev_pool_height, prev_pool_width = prev_pool_shape
            cur_d_in = (
                prev_n_out_filters *
                int(np.floor(1. * (prev_in_height - prev_filter_height + 1) / prev_pool_height)) *
                int(np.floor(1. * (prev_in_width - prev_filter_width + 1) / prev_pool_width))
                )
            cur_input = layers[-1].output.flatten(2)
        else:
            cur_d_in = hidden_layer_specs[i_layer - 1]["units"]
            cur_input = layers[-1].output

        if init_W is not None:
            W = init_W.pop(0)
            b = init_b.pop(0)
        else:
            W = None
            b = None
        cur_activation = hidden_layer_specs[i_layer]["activation"]
        layer = mlp.HiddenLayer(
            rng=rng,
            input=cur_input,
            d_in=cur_d_in,
            d_out=hidden_layer_specs[i_layer]["units"],
            activation=cur_activation,
            W=W,
            b=b
            )
        layers.append(layer)

        if dropout_rates is not None:
            if i_layer == 0:
                cur_dropout_input = dropout_layers[-1].output.flatten(2)
            else:
                cur_dropout_input = dropout_layers[-1].output
            dropout_rate = dropout_rates[len(conv_layer_specs) + i_layer]
            dropout_layer = mlp.DropoutHiddenLayer(
                rng=rng,
                srng=srng,
                dropout_rate=dropout_rate,
                input=cur_dropout_input,
                d_in=cur_d_in,
                d_out=hidden_layer_specs[i_layer]["units"],
                activation=cur_activation,
                W=layer.W / (1. - dropout_rate),
                b=layer.b
                )
            dropout_layers.append(dropout_layer)

    if dropout_rates is not None:
        return (dropout_layers, layers)
    return layers
Ejemplo n.º 3
0
def build_cnn_layers(rng,
                     input,
                     input_shape,
                     conv_layer_specs,
                     hidden_layer_specs,
                     srng=None,
                     dropout_rates=None,
                     init_W=None,
                     init_b=None):
    """
    Return the layers of a CNN consisting of a number of convolutional layers
    followed by a number of fully-connected hidden layers.

    The convolutional layers are built according to `conv_layer_specs`, a list
    of dict which gives the specifications for each layer. Each dict has fields
    "filter_shape" and "pool_shape". The filter shapes are given as
    (n_out_filters, n_in_channels, filter_height, filter_width) while the pool
    shapes are (height, width). As an example, a network with single-channel
    (28, 28) shaped input images with 2 convolutional layers followed by 2
    fully-connected layers could be built using:

        batch_size = 10
        rng = np.random.RandomState(42)
        input = T.matrix("x")
        input_shape=(batch_size, 1, 28, 28)
        conv_layer_specs = [
            {
                "filter_shape": (20, 1, 5, 5), "pool_shape": (2, 2),
                "activation": theano_utils.relu
            }, 
            {
                "filter_shape": (50, 20, 5, 5), "pool_shape": (2, 2)},
                "activation": theano_utils.relu
            }
            ]
        hidden_layer_specs = [
            {"units": 500, "activation": theano_utils.relu},
            {"units": 500, "activation": theano_utils.relu}
            ]
        cnn_layers = build_cnn_layers(
            rng, input, input_shape, conv_layer_specs, hidden_layer_specs
            )

    Parameters
    ----------
    input : symbolic tensor
        Input to the first layer of the CNN. The first dimension should be
        across data instances.
    input_shape : (int, int, int, int)
        The shape of the input: (n_data, n_channels, height, width).
    conv_layer_specs : list of dict
        Specifications for the convolutional layers.
    hidden_layer_specs : list of dict
        Specifications for the fully-connected hidden layers.
    dropout_rates : list of float
        The dropout rates for each of the layers (including the convolutional
        layers); if not provided, dropout is not performed.
    init_W : list of shared tensors
        If provided, these weights are used for layer initialization. The
        weights should be given in the same order that the layers are created
        (i.e. first the convolutional weights and then the fully-connected
        hidden layer weights). This is useful for tying weights.
    init_b : list of shared vectors
        If provided, these biases are used for layer initialization. The order
        should be the same as that of `init_W`.
    """

    assert len(
        conv_layer_specs) > 0, "Use MLP class if no convolutional layers"
    assert (dropout_rates is None or len(dropout_rates)
            == len(conv_layer_specs) + len(hidden_layer_specs))

    conv_layer_specs = copy.deepcopy(conv_layer_specs)
    hidden_layer_specs = copy.deepcopy(hidden_layer_specs)
    for layer_spec in conv_layer_specs:
        mlp.activation_str_to_op(layer_spec)
    for layer_spec in hidden_layer_specs:
        mlp.activation_str_to_op(layer_spec)

    if init_W is not None:
        assert init_b is not None

        # We are going to pop parameters, so make copies
        init_W = init_W[:]
        init_b = init_b[:]

    layers = []
    if dropout_rates is not None:
        dropout_layers = []

    # Build convolutional layers

    for i_layer in xrange(len(conv_layer_specs)):
        if i_layer == 0:
            cur_input_shape = input_shape
            cur_input = input.reshape(input_shape)
        else:
            batch_size, prev_n_in_channels, prev_in_height, prev_in_width = prev_input_shape
            prev_n_out_filters, prev_n_in_channels, prev_filter_height, prev_filter_width = (
                prev_filter_shape)
            prev_pool_height, prev_pool_width = prev_pool_shape
            cur_input_shape = (
                batch_size, prev_n_out_filters,
                int(
                    np.floor(1. * (prev_in_height - prev_filter_height + 1) /
                             prev_pool_height)),
                int(
                    np.floor(1. * (prev_in_width - prev_filter_width + 1) /
                             prev_pool_width)))
            cur_input = layers[-1].output

        if init_W is not None:
            W = init_W.pop(0)
            b = init_b.pop(0)
        else:
            W = None
            b = None
        cur_activation = conv_layer_specs[i_layer]["activation"]

        layer = ConvMaxPoolLayer(
            rng,
            input=cur_input,
            input_shape=cur_input_shape,
            filter_shape=conv_layer_specs[i_layer]["filter_shape"],
            pool_shape=conv_layer_specs[i_layer]["pool_shape"],
            activation=cur_activation,
            W=W,
            b=b)
        layers.append(layer)

        if dropout_rates is not None:
            if i_layer == 0:
                cur_dropout_input = input.reshape(input_shape)
            else:
                cur_dropout_input = dropout_layers[-1].output
            dropout_rate = dropout_rates[i_layer]
            dropout_layer = DropoutConvMaxPoolLayer(
                rng,
                srng,
                dropout_rate,
                input=cur_dropout_input,
                input_shape=cur_input_shape,
                filter_shape=conv_layer_specs[i_layer]["filter_shape"],
                pool_shape=conv_layer_specs[i_layer]["pool_shape"],
                activation=cur_activation,
                W=layer.W / (1. - dropout_rate),
                b=layer.b)
            dropout_layers.append(dropout_layer)

        # Store shapes for next layer
        prev_input_shape = cur_input_shape
        prev_filter_shape = conv_layer_specs[i_layer]["filter_shape"]
        prev_pool_shape = conv_layer_specs[i_layer]["pool_shape"]

    # Build fully-connected hidden layers

    for i_layer in xrange(len(hidden_layer_specs)):

        if i_layer == 0:
            # Shapes from last convolutional layer
            batch_size, prev_n_in_channels, prev_in_height, prev_in_width = prev_input_shape
            prev_n_out_filters, prev_n_in_channels, prev_filter_height, prev_filter_width = (
                prev_filter_shape)
            prev_pool_height, prev_pool_width = prev_pool_shape
            cur_d_in = (prev_n_out_filters * int(
                np.floor(1. * (prev_in_height - prev_filter_height + 1) /
                         prev_pool_height)) * int(
                             np.floor(1. *
                                      (prev_in_width - prev_filter_width + 1) /
                                      prev_pool_width)))
            cur_input = layers[-1].output.flatten(2)
        else:
            cur_d_in = hidden_layer_specs[i_layer - 1]["units"]
            cur_input = layers[-1].output

        if init_W is not None:
            W = init_W.pop(0)
            b = init_b.pop(0)
        else:
            W = None
            b = None
        cur_activation = hidden_layer_specs[i_layer]["activation"]
        layer = mlp.HiddenLayer(rng=rng,
                                input=cur_input,
                                d_in=cur_d_in,
                                d_out=hidden_layer_specs[i_layer]["units"],
                                activation=cur_activation,
                                W=W,
                                b=b)
        layers.append(layer)

        if dropout_rates is not None:
            if i_layer == 0:
                cur_dropout_input = dropout_layers[-1].output.flatten(2)
            else:
                cur_dropout_input = dropout_layers[-1].output
            dropout_rate = dropout_rates[len(conv_layer_specs) + i_layer]
            dropout_layer = mlp.DropoutHiddenLayer(
                rng=rng,
                srng=srng,
                dropout_rate=dropout_rate,
                input=cur_dropout_input,
                d_in=cur_d_in,
                d_out=hidden_layer_specs[i_layer]["units"],
                activation=cur_activation,
                W=layer.W / (1. - dropout_rate),
                b=layer.b)
            dropout_layers.append(dropout_layer)

    if dropout_rates is not None:
        return (dropout_layers, layers)
    return layers