    def __init__(self):
        # Define some model hyperparameters to work with MNIST images!
        input_size = 28 * 28  # dimensions of image
        hidden_size = 1000  # number of hidden units - generally bigger than input size for DAE

        # Now, define the symbolic input to the model (Theano)
        # We use a matrix rather than a vector so that minibatch processing can be done in parallel.
        x = T.matrix("X")
        self.inputs = [x]

        # Build the model's parameters - a weight matrix and two bias vectors
        W = get_weights_uniform(shape=(input_size, hidden_size), name="W")
        b0 = get_bias(shape=input_size, name="b0")
        b1 = get_bias(shape=hidden_size, name="b1")
        self.params = [W, b0, b1]

        # Perform the computation for a denoising autoencoder!
        # first, add noise (corrupt) the input
        corrupted_input = salt_and_pepper(input=x, noise_level=0.4)
        # next, run the hidden layer given the inputs (the encoding function)
        hiddens = tanh(T.dot(corrupted_input, W) + b1)
        # finally, create the reconstruction from the hidden layer (we tie the weights with W.T)
        reconstruction = sigmoid(T.dot(hiddens, W.T) + b0)
        # the training cost is reconstruction error - with MNIST this is binary cross-entropy
        self.train_cost = binary_crossentropy(output=reconstruction, target=x)

        # Compile everything into a Theano function for prediction!
        # When using real-world data in predictions, we wouldn't corrupt the input first.
        # Therefore, create another version of the hiddens and reconstruction without adding the noise
        hiddens_predict = tanh(T.dot(x, W) + b1)
        self.recon_predict = sigmoid(T.dot(hiddens_predict, W.T) + b0)
    def __init__(self):
        # Define some model hyperparameters to work with MNIST images!
        input_size = 28*28  # dimensions of image
        hidden_size = 1000  # number of hidden units - generally bigger than input size for DAE

        # Now, define the symbolic input to the model (Theano)
        # We use a matrix rather than a vector so that minibatch processing can be done in parallel.
        x = T.matrix("X")
        self.inputs = [x]

        # Build the model's parameters - a weight matrix and two bias vectors
        W = get_weights_uniform(shape=(input_size, hidden_size), name="W")
        b0 = get_bias(shape=input_size, name="b0")
        b1 = get_bias(shape=hidden_size, name="b1")
        self.params = [W, b0, b1]

        # Perform the computation for a denoising autoencoder!
        # first, add noise (corrupt) the input
        corrupted_input = salt_and_pepper_custom(input=x)
        # next, run the hidden layer given the inputs (the encoding function)
        hiddens = tanh(T.dot(corrupted_input, W) + b1)
        # finally, create the reconstruction from the hidden layer (we tie the weights with W.T)
        reconstruction = sigmoid(T.dot(hiddens, W.T) + b0)
        # the training cost is reconstruction error - with MNIST this is binary cross-entropy
        self.train_cost = binary_crossentropy(output=reconstruction, target=x)

        # Compile everything into a Theano function for prediction!
        # When using real-world data in predictions, we wouldn't corrupt the input first.
        # Therefore, create another version of the hiddens and reconstruction without adding the noise
        hiddens_predict      = tanh(T.dot(x, W) + b1)
        self.recon_predict   = sigmoid(T.dot(hiddens_predict, W.T) + b0)
    def __init__(self, inputs=None, params=None, outdir='outputs/conv1d',
                 n_filters=None, filter_size=None, stride=None, border_mode='valid',
                 weights_init='uniform', weights_interval='montreal', weights_mean=0, weights_std=5e-3,
        Initialize a 1-D convolutional layer.

        inputs : tuple(shape, `Theano.TensorType`)
            The dimensionality of the inputs for this model, and the routing information for the model
            to accept inputs from elsewhere. `shape` will be a monad tuple representing known
            sizes for each dimension in the `Theano.TensorType`. Shape of the incoming data:
            (batch_size, num_channels, data_dimensionality). Most likely, your channels
            will be 1. For example, batches of text will be of the form (N, 1, D) where N=examples in minibatch and
            D=dimensionality (chars, words, etc.)
        params : Dict(string_name: theano SharedVariable), optional
            A dictionary of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters - such as siamese networks or pretraining some
        outdir : str
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        n_filters : int
            The number of filters to use (convolution kernels).
        filter_size : int
            The size of the convolution filter.
        stride : int
            The distance between the receptive field centers of neighboring units. This is the 'stride' of the
            convolution operation.
        border_mode : str, one of 'valid', 'full', 'same'
            A string indicating the convolution border mode.
            If 'valid', the convolution is only computed where the input and the
            filter fully overlap.
            If 'full', the convolution is computed wherever the input and the
            filter overlap by at least one position.
        weights_init : str
            Determines the method for initializing model weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        activation : str or Callable
            The activation function to apply to the layer. See opendeep.utils.activation for options.
        convolution : str or Callable
            The 1-dimensional convolution implementation to use. The default of 'mc0' is normally fine. See
            opendeep.utils.conv1d_implementations for alternatives. (This is necessary because Theano only
            supports 2D convolutions at the moment).
        mrg : random
            A random number generator that is used when adding noise.
            I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.

        Theano's default convolution function (`theano.tensor.nnet.conv.conv2d`)
        does not support the 'same' border mode by default. This layer emulates
        it by performing a 'full' convolution and then cropping the result, which
        may negatively affect performance.
        initial_parameters = locals().copy()
        super(Conv1D, self).__init__(**initial_parameters)
        if self.inputs is None:

        # specifications #
        # grab info from the inputs_hook, or from parameters
        # expect input to be in the form (B, C, I) (batch, channel, input data)
        # inputs_hook is a tuple of (Shape, Input)
        # self.inputs is a list of all the input expressions (we enforce only 1, so self.inputs[0] is the input)
        input_shape, self.input = self.inputs[0]
        assert self.input.ndim == 3, "Expected 3D input variable with form (batch, channel, input_data)"
        assert len(input_shape) == 3, "Expected 3D input shape with form (batch, channel, input_data)"

        n_channels = input_shape[1]

        filter_shape = (n_filters, n_channels, filter_size)

        # activation function!
        activation_func = get_activation_function(activation)

        # convolution function!
        convolution_func = get_conv1d_function(convolution)

        outshape = ConvOp.getOutputShape(
        self.output_size = (input_shape[0], n_filters) + outshape

        # Params #
        W = self.params.get(
                        # if gaussian
                        # if uniform

        b = self.params.get(
            get_bias(shape=(n_filters,), name="b", init_values=bias_init)

        # Finally have the two parameters!
        self.params = OrderedDict([("W", W), ("b", b)])

        # Computational Graph! #
        if border_mode in ['valid', 'full']:
            conved = convolution_func(self.input,
            log.error("Invalid border mode: '%s'" % border_mode)
            raise RuntimeError("Invalid border mode: '%s'" % border_mode)

        self.output = activation_func(conved + b.dimshuffle('x', 0, 'x'))
파일: rnn_rbm.py 프로젝트: 52nlp/OpenDeep
    def __init__(self, inputs_hook=None, hiddens_hook=None, params_hook=None, outdir='outputs/rnnrbm/',
                 input_size=None, hidden_size=None,
                 visible_activation='sigmoid', hidden_activation='sigmoid',
                 weights_mean=0, weights_std=5e-3, weights_interval='montreal',
                 bias_init=0, mrg=RNG_MRG.MRG_RandomStreams(1),
                 rnn_hidden_size=None, rnn_hidden_activation='rectifier',
                 rnn_weights_mean=0, rnn_weights_std=5e-3, rnn_weights_interval='montreal',
        Initialize the RNN-RBM.

        inputs_hook : Tuple of (shape, variable)
            Routing information for the model to accept inputs from elsewhere. This is used for linking
            different models together. For now, it needs to include the shape information (normally the
            dimensionality of the input i.e. input_size).
        hiddens_hook : Tuple of (shape, variable)
            Routing information for the model to accept its hidden representation from elsewhere.
            This is used for linking different models together. For now, it needs to include the shape
            information (normally the dimensionality of the hiddens i.e. hidden_size).
        params_hook : List(theano shared variable)
            A list of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables).
        outdir : str
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        input_size : int
            The size (dimensionality) of the input to the RBM. If shape is provided in `inputs_hook`, this is optional.
            The :class:`Model` requires an `output_size`, which gets set to this value because the RBM is an
            unsupervised model. The output is a reconstruction of the input.
        hidden_size : int
            The size (dimensionality) of the hidden layer for the RBM.
        visible_activation : str or callable
            The nonlinear (or linear) visible activation to perform after the dot product from hiddens -> visible layer.
            This activation function should be appropriate for the input unit types, i.e. 'sigmoid' for binary inputs.
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        hidden_activation : str or callable
            The nonlinear (or linear) hidden activation to perform after the dot product from visible -> hiddens layer.
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        weights_init : str
            Determines the method for initializing model weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        mrg : random
            A random number generator that is used when sampling. The RBM is a probabilistic model, so it relies a lot
            on sampling. I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.
        k : int
            The k number of steps used for CD-k or PCD-k with Gibbs sampling. Basically, the number of samples
            generated from the model to train against reconstructing the original input.
        rnn_hidden_size : int
            The number of hidden units (dimensionality) to use in the recurrent layer.
        rnn_hidden_activation : str or Callable
            The activation function to apply to recurrent units. See opendeep.utils.activation for options.
        rnn_weights_init : str
            Determines the method for initializing recurrent weights. See opendeep.utils.nnet for options. 'Identity'
            works well with 'rectifier' `rnn_hidden_activation`.
        rnn_weights_mean : float
            If Gaussian `rnn_weights_init`, the mean value to use.
        rnn_weights_std : float
            If Gaussian `rnn_weights_init`, the standard deviation to use.
        rnn_weights_interval : str or float
            If Uniform `rnn_weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        rnn_bias_init : float
            The initial value to use for the recurrent bias parameter. Most often, the default of 0.0 is preferred.
        generate_n_steps : int
            When generating from the model, how many steps to generate.
        super(RNN_RBM, self).__init__(**{arg: val for (arg, val) in locals().iteritems() if arg is not 'self'})

        # specifications #
        self.mrg = mrg
        self.k = k
        self.generate_n_steps = generate_n_steps

        # grab info from the inputs_hook, hiddens_hook, or from parameters
        if self.inputs_hook is not None:  # inputs_hook is a tuple of (Shape, Input)
            raise NotImplementedError("Inputs_hook not implemented yet for RNN-RBM")
            # make the input a symbolic matrix - a sequence of inputs
            self.input = T.matrix('Vs')

        # set an initial value for the recurrent hiddens
        self.u0 = T.zeros((rnn_hidden_size,))

        # make a symbolic vector for the initial recurrent hiddens value to use during generation for the model
        self.generate_u0 = T.vector("generate_u0")

        # either grab the hidden's desired size from the parameter directly, or copy n_in
        self.hidden_size = hidden_size or self.input_size

        # deal with hiddens_hook
        if self.hiddens_hook is not None:
            raise NotImplementedError("Hiddens_hook not implemented yet for RNN_RBM")

        # other specifications
        # visible activation function!
        self.visible_activation_func = get_activation_function(visible_activation)

        # make sure the sampling functions are appropriate for the activation functions.
        if is_binary(self.visible_activation_func):
            self.visible_sampling = self.mrg.binomial
            # TODO: implement non-binary activation
            log.error("Non-binary visible activation not supported yet!")
            raise NotImplementedError("Non-binary visible activation not supported yet!")

        # hidden activation function!
        self.hidden_activation_func = get_activation_function(hidden_activation)

        # make sure the sampling functions are appropriate for the activation functions.
        if is_binary(self.hidden_activation_func):
            self.hidden_sampling = self.mrg.binomial
            # TODO: implement non-binary activation
            log.error("Non-binary hidden activation not supported yet!")
            raise NotImplementedError("Non-binary hidden activation not supported yet!")

        # recurrent hidden activation function!
        self.rnn_hidden_activation_func = get_activation_function(rnn_hidden_activation)

        # symbolic scalar for how many recurrent steps to use during generation from the model
        self.n_steps = T.iscalar("generate_n_steps")

        # parameters - make sure to deal with params_hook! #
        if self.params_hook is not None:
            # make sure the params_hook has W (weights matrix) and bh, bv (bias vectors)
            assert len(self.params_hook) == 8, \
                "Expected 8 params (W, bv, bh, Wuh, Wuv, Wvu, Wuu, bu) for RBM, found {0!s}!".format(
            self.W, self.bv, self.bh, self.Wuh, self.Wuv, self.Wvu, self.Wuu, self.bu = self.params_hook
            # RBM weight params
            self.W = get_weights(weights_init=weights_init,
                                 shape=(self.input_size, self.hidden_size),
                                 # if gaussian
                                 # if uniform
            # RNN weight params
            self.Wuh = get_weights(weights_init=rnn_weights_init,
                                   shape=(rnn_hidden_size, self.hidden_size),
                                   # if gaussian
                                   # if uniform

            self.Wuv = get_weights(weights_init=rnn_weights_init,
                                   shape=(rnn_hidden_size, self.input_size),
                                   # if gaussian
                                   # if uniform

            self.Wvu = get_weights(weights_init=rnn_weights_init,
                                   shape=(self.input_size, rnn_hidden_size),
                                   # if gaussian
                                   # if uniform

            self.Wuu = get_weights(weights_init=rnn_weights_init,
                                   shape=(rnn_hidden_size, rnn_hidden_size),
                                   # if gaussian
                                   # if uniform

            # grab the bias vectors
            # rbm biases
            self.bv = get_bias(shape=self.input_size, name="bv", init_values=bias_init)
            self.bh = get_bias(shape=self.hidden_size, name="bh", init_values=bias_init)
            # rnn bias
            self.bu = get_bias(shape=rnn_hidden_size, name="bu", init_values=rnn_bias_init)

        # Finally have the parameters
        self.params = [self.W, self.bv, self.bh, self.Wuh, self.Wuv, self.Wvu, self.Wuu, self.bu]

        # Create the RNN-RBM graph!
        self.v_sample, self.cost, self.monitors, self.updates_train, self.v_ts, self.updates_generate, self.u_t = \

        log.info("Initialized an RNN-RBM!")
    def __init__(self, inputs=None, outputs=None, params=None, outdir='outputs/basic',
                 weights_init='uniform', weights_mean=0, weights_std=5e-3, weights_interval='montreal',
        Initialize a basic layer.

        inputs : List of [tuple(shape, `Theano.TensorType`)]
            The dimensionality of the inputs for this model, and the routing information for the model
            to accept inputs from elsewhere. `shape` will be a monad tuple representing known
            sizes for each dimension in the `Theano.TensorType`. The length of `shape` should be equal to number of
            dimensions in `Theano.TensorType`, where the shape element is an integer representing the size for its
            dimension, or None if the shape isn't known. For example, if you have a matrix with unknown batch size
            but fixed feature size of 784, `shape` would be: (None, 784). The full form of `inputs` would be:
            [((None, 784), <TensorType(float32, matrix)>)].
        outputs : int
            The dimensionality of the output for this model.
        params : Dict(string_name: theano SharedVariable), optional
            A dictionary of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters - such as siamese networks or pretraining some
        outdir : str
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        activation : str or callable
            The activation function to use after the dot product going from input -> output. This can be a string
            representing an option from opendeep.utils.activation, or your own function as long as it is callable.
        weights_init : str
            Determines the method for initializing input -> output weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        mrg : random
            A random number generator that is used when adding noise.
            I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.
        # init Model to combine the defaults and config dictionaries with the initial parameters.
        initial_parameters = locals().copy()
        super(Dense, self).__init__(**initial_parameters)
        if self.inputs is None:

        # specifications #
        if len(self.inputs) > 1:
            raise NotImplementedError("Expected 1 input to Dense, found %d. Please merge inputs before passing "
                                      "to the Dense model!" % len(self.inputs))
        # self.inputs is a list of all the input expressions (we enforce only 1, so self.inputs[0] is the input)
        input_shape, self.input = self.inputs[0]
        if isinstance(input_shape, int):
            self.input_size = ((None, ) * (self.input.ndim-1)) + (input_shape, )
            self.input_size = input_shape
        assert self.input_size is not None, "Need to specify the shape for the last dimension of the input!"

        # We also only have 1 output
        assert self.output_size is not None, "Need to specify outputs size!"
        out_size = self.output_size[0]
        if isinstance(out_size, int):
            self.output_size = self.input_size[:-1] + (out_size,)
            self.output_size = out_size

        # activation function!
        activation_func = get_activation_function(activation)

        # parameters - make sure to deal with input dictionary! #
        W = self.params.get(
                        shape=(self.input_size[-1], self.output_size[-1]),
                        # if gaussian
                        # if uniform

        b = self.params.get(
            get_bias(shape=self.output_size[-1], name="b", init_values=bias_init)

        # Finally have the two parameters - weights matrix W and bias vector b. That is all!
        self.params = OrderedDict([("W", W), ("b", b)])

        # computation #
        # Here is the meat of the computation transforming input -> output
        # It simply involves a matrix multiplication of inputs*weights, adding the bias vector, and then passing
        # the result through our activation function (normally something nonlinear such as: max(0, output))
        self.output = activation_func(dot(self.input, W) + b)

        log.debug("Initialized a basic fully-connected layer with shape %s and activation: %s",
                  str((self.input_size[-1], self.output_size[-1])), str(activation))
    def __init__(
        Initialize a basic layer.

        inputs_hook : Tuple of (shape, variable)
            Routing information for the model to accept inputs from elsewhere. This is used for linking
            different models together. For now, it needs to include the shape information (normally the
            dimensionality of the input i.e. input_size).
        params_hook : List(theano shared variable)
            A list of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters - such as a training model with dropout applied
            to layers and one without for testing, where the parameters are shared between the two.
        outdir : str
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        input_size : int
            The size (dimensionality) of the input to the layer. If shape is provided in `inputs_hook`,
            this is optional.
        output_size : int
            The size (dimensionality) of the output from the layer.
        activation : str or callable
            The activation function to use after the dot product going from input -> output. This can be a string
            representing an option from opendeep.utils.activation, or your own function as long as it is callable.
        cost : str or callable
            The cost function to use when training the layer. This should be appropriate for the output type, i.e.
            mse for real-valued outputs, binary cross-entropy for binary outputs, etc.
        cost_args : dict
            Any additional named keyword arguments to pass to the specified `cost_function`.
        weights_init : str
            Determines the method for initializing input -> output weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        noise : str
            What type of noise to use for corrupting the output (if not None). See opendeep.utils.noise
            for options. This should be appropriate for the output activation, i.e. Gaussian for tanh or other
            real-valued activations, etc. Often, you will use 'dropout' here as a regularization in BasicLayers.
        noise_level : float
            The amount of noise to use for the noise function specified by `noise`. This could be the
            standard deviation for gaussian noise, the interval for uniform noise, the dropout amount, etc.
        mrg : random
            A random number generator that is used when adding noise.
            I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.
        # init Model to combine the defaults and config dictionaries with the initial parameters.
        initial_parameters = locals().copy()
        super(Dense, self).__init__(**initial_parameters)

        # specifications #
        # grab info from the inputs_hook, or from parameters
        if inputs_hook is not None:  # inputs_hook is a tuple of (Shape, Input)
            assert len(inputs_hook) == 2, "Expected inputs_hook to be tuple!"  # make sure inputs_hook is a tuple
            self.input = inputs_hook[1]
            # make the input a symbolic matrix
            self.input = T.matrix("X")

        # now that we have the input specs, define the output 'target' variable to be used in supervised training!
        if kwargs.get("out_as_probs") == False:
            self.target = T.vector("Y", dtype="int64")
            self.target = T.matrix("Y")

        # either grab the output's desired size from the parameter directly, or copy input_size
        self.output_size = self.output_size or self.input_size

        # other specifications
        # activation function!
        activation_func = get_activation_function(activation)
        # cost function!
        cost_func = get_cost_function(cost)
        cost_args = cost_args or dict()

        # parameters - make sure to deal with params_hook! #
        if params_hook is not None:
            # make sure the params_hook has W (weights matrix) and b (bias vector)
            assert len(params_hook) == 2, "Expected 2 params (W and b) for Dense, found {0!s}!".format(len(params_hook))
            W, b = params_hook
            W = get_weights(
                shape=(self.input_size, self.output_size),
                # if gaussian
                # if uniform

            # grab the bias vector
            b = get_bias(shape=output_size, name="b", init_values=bias_init)

        # Finally have the two parameters - weights matrix W and bias vector b. That is all!
        self.params = [W, b]

        # computation #
        # Here is the meat of the computation transforming input -> output
        # It simply involves a matrix multiplication of inputs*weights, adding the bias vector, and then passing
        # the result through our activation function (normally something nonlinear such as: max(0, output))
        self.output = activation_func(T.dot(self.input, W) + b)

        # Now deal with noise if we added it:
        if noise:
            log.debug("Adding noise switch.")
            if noise_level is not None:
                noise_func = get_noise(noise, noise_level=noise_level, mrg=mrg)
                noise_func = get_noise(noise, mrg=mrg)
            # apply the noise as a switch!
            # default to apply noise. this is for the cost and gradient functions to be computed later
            # (not sure if the above statement is accurate such that gradient depends on initial value of switch)
            self.switch = sharedX(value=1, name="basiclayer_noise_switch")
            self.output = T.switch(self.switch, noise_func(input=self.output), self.output)

        # now to define the cost of the model - use the cost function to compare our output with the target value.
        self.cost = cost_func(output=self.output, target=self.target, **cost_args)

            "Initialized a basic fully-connected layer with shape %s and activation: %s",
            str((self.input_size, self.output_size)),
    def __init__(self,
        Initialize a basic layer.

        inputs_hook : Tuple of (shape, variable)
            Routing information for the model to accept inputs from elsewhere. This is used for linking
            different models together. For now, it needs to include the shape information (normally the
            dimensionality of the input i.e. input_size).
        params_hook : List(theano shared variable)
            A list of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters - such as a training model with dropout applied
            to layers and one without for testing, where the parameters are shared between the two.
        outdir : str
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        input_size : int
            The size (dimensionality) of the input to the layer. If shape is provided in `inputs_hook`,
            this is optional.
        output_size : int
            The size (dimensionality) of the output from the layer.
        activation : str or callable
            The activation function to use after the dot product going from input -> output. This can be a string
            representing an option from opendeep.utils.activation, or your own function as long as it is callable.
        cost : str or callable
            The cost function to use when training the layer. This should be appropriate for the output type, i.e.
            mse for real-valued outputs, binary cross-entropy for binary outputs, etc.
        cost_args : dict
            Any additional named keyword arguments to pass to the specified `cost_function`.
        weights_init : str
            Determines the method for initializing input -> output weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        noise : str
            What type of noise to use for corrupting the output (if not None). See opendeep.utils.noise
            for options. This should be appropriate for the output activation, i.e. Gaussian for tanh or other
            real-valued activations, etc. Often, you will use 'dropout' here as a regularization in BasicLayers.
        noise_level : float
            The amount of noise to use for the noise function specified by `noise`. This could be the
            standard deviation for gaussian noise, the interval for uniform noise, the dropout amount, etc.
        mrg : random
            A random number generator that is used when adding noise.
            I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.
        # init Model to combine the defaults and config dictionaries with the initial parameters.
        initial_parameters = locals().copy()
        super(Dense, self).__init__(**initial_parameters)

        # specifications #
        # grab info from the inputs_hook, or from parameters
        if inputs_hook is not None:  # inputs_hook is a tuple of (Shape, Input)
            assert len(
            ) == 2, 'Expected inputs_hook to be tuple!'  # make sure inputs_hook is a tuple
            self.input = inputs_hook[1]
            # make the input a symbolic matrix
            self.input = T.matrix('X')

        # now that we have the input specs, define the output 'target' variable to be used in supervised training!
        if kwargs.get('out_as_probs') == False:
            self.target = T.vector('Y', dtype='int64')
            self.target = T.matrix('Y')

        # either grab the output's desired size from the parameter directly, or copy input_size
        self.output_size = self.output_size or self.input_size

        # other specifications
        # activation function!
        activation_func = get_activation_function(activation)
        # cost function!
        cost_func = get_cost_function(cost)
        cost_args = cost_args or dict()

        # parameters - make sure to deal with params_hook! #
        if params_hook is not None:
            # make sure the params_hook has W (weights matrix) and b (bias vector)
            assert len(params_hook) == 2, \
                "Expected 2 params (W and b) for Dense, found {0!s}!".format(len(params_hook))
            W, b = params_hook
            W = get_weights(
                shape=(self.input_size, self.output_size),
                # if gaussian
                # if uniform

            # grab the bias vector
            b = get_bias(shape=output_size, name="b", init_values=bias_init)

        # Finally have the two parameters - weights matrix W and bias vector b. That is all!
        self.params = [W, b]

        # computation #
        # Here is the meat of the computation transforming input -> output
        # It simply involves a matrix multiplication of inputs*weights, adding the bias vector, and then passing
        # the result through our activation function (normally something nonlinear such as: max(0, output))
        self.output = activation_func(T.dot(self.input, W) + b)

        # Now deal with noise if we added it:
        if noise:
            log.debug('Adding noise switch.')
            if noise_level is not None:
                noise_func = get_noise(noise, noise_level=noise_level, mrg=mrg)
                noise_func = get_noise(noise, mrg=mrg)
            # apply the noise as a switch!
            # default to apply noise. this is for the cost and gradient functions to be computed later
            # (not sure if the above statement is accurate such that gradient depends on initial value of switch)
            self.switch = sharedX(value=1, name="basiclayer_noise_switch")
            self.output = T.switch(self.switch, noise_func(input=self.output),

        # now to define the cost of the model - use the cost function to compare our output with the target value.
        self.cost = cost_func(output=self.output,

            "Initialized a basic fully-connected layer with shape %s and activation: %s",
            str((self.input_size, self.output_size)), str(activation))
    def __init__(self, inputs_hook=None, hiddens_hook=None, params_hook=None, outdir='outputs/gsn/',
                 input_size=None, hidden_size=1000,
                 layers=2, walkbacks=4,
                 visible_activation='sigmoid', hidden_activation='tanh',
                 input_sampling=True, mrg=RNG_MRG.MRG_RandomStreams(1),
                 weights_init='uniform', weights_interval='montreal', weights_mean=0, weights_std=5e-3,
                 cost_function='binary_crossentropy', cost_args=None,
                 add_noise=True, noiseless_h1=True,
                 hidden_noise='gaussian', hidden_noise_level=2, input_noise='salt_and_pepper', input_noise_level=0.4,
                 noise_decay='exponential', noise_annealing=1,
                 image_width=None, image_height=None,
        Initialize a GSN.

        inputs_hook : Tuple of (shape, variable)
            Routing information for the model to accept inputs from elsewhere. This is used for linking
            different models together (e.g. setting the Softmax model's input layer to the DAE's hidden layer gives a
            newly supervised classification model). For now, it needs to include the shape information (normally the
            dimensionality of the input i.e. n_in).
        hiddens_hook : Tuple of (shape, variable)
            Routing information for the model to accept its hidden representation from elsewhere.
            This is used for linking different models together (e.g. setting the DAE model's hidden layers to the RNN's
            output layer gives a generative recurrent model.) For now, it needs to include the shape
            information (normally the dimensionality of the hiddens i.e. n_hidden).
        params_hook : List(theano shared variable)
            A list of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters - such as a training model with dropout applied
            to layers and one without for testing, where the parameters are shared between the two.
        outdir : str
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        input_size : int
            The size (dimensionality) of the input to the DAE. If shape is provided in `inputs_hook`, this is optional.
            The :class:`Model` requires an `output_size`, which gets set to this value because the DAE is an
            unsupervised model. The output is a reconstruction of the input.
        hidden_size : int
            The size (dimensionality) of the hidden layer for the DAE. Generally, you want it to be larger than
            `input_size`, which is known as *overcomplete*.
        visible_activation : str or callable
            The nonlinear (or linear) visible activation to perform after the dot product from hiddens -> visible layer.
            This activation function should be appropriate for the input unit types, i.e. 'sigmoid' for binary inputs.
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        hidden_activation : str or callable
            The nonlinear (or linear) hidden activation to perform after the dot product from visible -> hiddens layer.
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        layers : int
            The number of hidden layers to use.
        walkbacks : int
            The number of walkbacks to perform (the variable K in Bengio's paper above). A walkback is a Gibbs sample
            from the DAE, which means the model generates inputs in sequence, where each generated input is compared
            to the original input to create the reconstruction cost for training. For running the model, the very last
            generated input in the Gibbs chain is used as the output.
        input_sampling : bool
            During walkbacks, whether to sample from the generated input to create a new starting point for the next
            walkback (next step in the Gibbs chain). This generally makes walkbacks more effective by making the
            process more stochastic - more likely to find spurious modes in the model's representation.
        mrg : random
            A random number generator that is used when adding noise into the network and for sampling from the input.
            I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.
        tied_weights : bool
            DAE has two weight matrices - W from input -> hiddens and V from hiddens -> input. This boolean
            determines if V = W.T, which 'ties' V to W and reduces the number of parameters necessary during training.
        weights_init : str
            Determines the method for initializing model weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        cost_function : str or callable
            The function to use when calculating the reconstruction cost of the model. This should be appropriate
            for the type of input, i.e. use 'binary_crossentropy' for binary inputs, or 'mse' for real-valued inputs.
            See opendeep.utils.cost for options. You can also specify your own function, which needs to be callable.
        cost_args : dict
            Any additional named keyword arguments to pass to the specified `cost_function`.
        add_noise : bool
            Whether to add noise (corrupt) the input before passing it through the computation graph during training.
            This should most likely be set to the default of True, because this is a *denoising* autoencoder after all.
        noiseless_h1 : bool
            Whether to not add noise (corrupt) the hidden layer during computation.
        hidden_noise : str
            What type of noise to use for corrupting the hidden layer (if not `noiseless_h1`). See opendeep.utils.noise
            for options. This should be appropriate for the hidden unit activation, i.e. Gaussian for tanh or other
            real-valued activations, etc.
        hidden_noise_level : float
            The amount of noise to use for the noise function specified by `hidden_noise`. This could be the
            standard deviation for gaussian noise, the interval for uniform noise, the dropout amount, etc.
        input_noise : str
            What type of noise to use for corrupting the input before computation (if `add_noise`).
            See opendeep.utils.noise for options. This should be appropriate for the input units, i.e. salt-and-pepper
            for binary units, etc.
        input_noise_level : float
            The amount of noise used to corrupt the input. This could be the masking probability for salt-and-pepper,
            standard deviation for Gaussian, interval for Uniform, etc.
        noise_decay : str or False
            Whether to use `input_noise` scheduling (decay `input_noise_level` during the course of training),
            and if so, the string input specifies what type of decay to use. See opendeep.utils.decay for options.
            Noise decay (known as noise scheduling) effectively helps the DAE learn larger variance features first,
            and then smaller ones later (almost as a kind of curriculum learning). May help it converge faster.
        noise_annealing : float
            The amount to reduce the `input_noise_level` after each training epoch based on the decay function specified
            in `noise_decay`.
        image_width : int
            If the input should be represented as an image, the width of the input image. If not specified, it will be
            close to the square factor of the `input_size`.
        image_height : int
            If the input should be represented as an image, the height of the input image. If not specified, it will be
            close to the square factor of the `input_size`.
        # init Model to combine the defaults and config dictionaries with the initial parameters.
        initial_parameters = locals().copy()
        super(GSN, self).__init__(**initial_parameters)

        # when the input should be thought of as an image, either use the specified width and height,
        # or try to make as square as possible.
        if image_height is None and image_width is None:
            (_h, _w) = closest_to_square_factors(self.input_size)
            self.image_width  = _w
            self.image_height = _h
            self.image_height = image_height
            self.image_width = image_width

        # Theano variables and RNG #
        if self.inputs_hook is None:
            self.X = T.matrix('X')
            # inputs_hook is a (shape, input) tuple
            self.X = self.inputs_hook[1]
        # Network specifications #
        # generally, walkbacks should be at least 2*layers
        if layers % 2 == 0:
            if walkbacks < 2*layers:
                log.warning('Not enough walkbacks for the layers! Layers is %s and walkbacks is %s. '
                            'Generaly want 2X walkbacks to layers',
                            str(layers), str(walkbacks))
            if walkbacks < 2*layers-1:
                log.warning('Not enough walkbacks for the layers! Layers is %s and walkbacks is %s. '
                            'Generaly want 2X walkbacks to layers',
                            str(layers), str(walkbacks))

        self.add_noise = add_noise
        self.noise_annealing = as_floatX(noise_annealing)  # noise schedule parameter
        self.hidden_noise_level = sharedX(hidden_noise_level, dtype=theano.config.floatX)
        self.hidden_noise = get_noise(name=hidden_noise, noise_level=self.hidden_noise_level, mrg=mrg)
        self.input_noise_level = sharedX(input_noise_level, dtype=theano.config.floatX)
        self.input_noise = get_noise(name=input_noise, noise_level=self.input_noise_level, mrg=mrg)

        self.walkbacks = walkbacks
        self.tied_weights = tied_weights
        self.layers = layers
        self.noiseless_h1 = noiseless_h1
        self.input_sampling = input_sampling
        self.noise_decay = noise_decay

        # if there was a hiddens_hook, unpack the hidden layers in the tensor
        if self.hiddens_hook is not None:
            hidden_size = self.hiddens_hook[0]
            self.hiddens_flag = True
            self.hiddens_flag = False

        # determine the sizes of each layer in a list.
        #  layer sizes, from h0 to hK (h0 is the visible layer)
        hidden_size = list(raise_to_list(hidden_size))
        if len(hidden_size) == 1:
            self.layer_sizes = [self.input_size] + hidden_size * self.layers
            assert len(hidden_size) == self.layers, "Hiddens sizes and number of hidden layers mismatch." + \
                                                    "Hiddens %d and layers %d" % (len(hidden_size), self.layers)
            self.layer_sizes = [self.input_size] + hidden_size

        if self.hiddens_hook is not None:
            self.hiddens = self.unpack_hiddens(self.hiddens_hook[1])

        # Activation functions! #
        # hidden unit activation
        self.hidden_activation = get_activation_function(hidden_activation)
        # Visible layer activation
        self.visible_activation = get_activation_function(visible_activation)
        # make sure the sampling functions are appropriate for the activation functions.
        if is_binary(self.visible_activation):
            self.visible_sampling = mrg.binomial
            # TODO: implement non-binary activation
            log.error("Non-binary visible activation not supported yet!")
            raise NotImplementedError("Non-binary visible activation not supported yet!")

        # Cost function
        self.cost_function = get_cost_function(cost_function)
        self.cost_args = cost_args or dict()

        # Parameters! #
        # make sure to deal with params_hook!
        if self.params_hook is not None:
            # if tied weights, expect layers*2 + 1 params
            if self.tied_weights:
                assert len(self.params_hook) == 2*layers + 1, \
                    "Tied weights: expected {0!s} params, found {1!s}!".format(2*layers+1, len(self.params_hook))
                self.weights_list = self.params_hook[:layers]
                self.bias_list = self.params_hook[layers:]
            # if untied weights, expect layers*3 + 1 params
                assert len(self.params_hook) == 3*layers + 1, \
                    "Untied weights: expected {0!s} params, found {1!s}!".format(3*layers+1, len(self.params_hook))
                self.weights_list = self.params_hook[:2*layers]
                self.bias_list = self.params_hook[2*layers:]
        # otherwise, construct our params
            # initialize a list of weights and biases based on layer_sizes for the GSN
            self.weights_list = [get_weights(weights_init=weights_init,
                                             shape=(self.layer_sizes[i], self.layer_sizes[i+1]),
                                             name="W_{0!s}_{1!s}".format(i, i+1),
                                             # if gaussian
                                             # if uniform
                                 for i in range(layers)]
            # add more weights if we aren't tying weights between layers (need to add for higher-lower layers now)
            if not tied_weights:
                                 shape=(self.layer_sizes[i+1], self.layer_sizes[i]),
                                 name="W_{0!s}_{1!s}".format(i+1, i),
                                 # if gaussian
                                 # if uniform
                     for i in reversed(range(layers))]
            # initialize each layer bias to 0's.
            self.bias_list = [get_bias(shape=(self.layer_sizes[i],),
                                       name='b_' + str(i),
                              for i in range(layers+1)]

        # build the params of the model into a list
        self.params = self.weights_list + self.bias_list
        log.debug("gsn params: %s", str(self.params))

        # using the properties, build the computational graph
        self.cost, self.monitors, self.output, self.hiddens = self.build_computation_graph()
    def __init__(self,
        # init Model to combine the defaults and config dictionaries.
        super(ConvPoolLayer, self).__init__(config, defaults)
        # all configuration parameters are now in self.args

        # deal with the inputs coming from inputs_hook - necessary for now to give an input hook
        # inputs_hook is a tuple of (Shape, Input)
        if inputs_hook:
            assert len(
            ) == 2, "expecting inputs_hook to be tuple of (shape, input)"
            self.input_shape = inputs_hook[0] or input_shape or self.args.get(
            self.input = inputs_hook[1]
            self.input_shape = input_shape or self.args.get('input_shape')
            self.input = T.ftensor4("X")

        # layer configuration #
        # activation function!
        activation_name = activation or self.args.get('activation')
        if isinstance(activation_name, basestring):
            self.activation_func = get_activation_function(activation_name)
            self.activation_func = activation_name
            assert callable(
            ), "Activation function either needs to be a string name or callable!"
        self.convolution = convolution or self.args.get('convolution')
        self.filter_shape = filter_shape or self.args.get('filter_shape')
        self.convstride = convstride or self.args.get('convstride')
        self.padsize = padsize or self.args.get('padsize')

        self.poolsize = poolsize or self.args.get('poolsize')
        self.poolstride = poolstride or self.args.get('poolstride')

        # expect image_shape to be bc01!
        self.channel = self.input_shape[1]

        self.lrn = local_response_normalization or self.args.get(

        # if lib_conv is cudnn, it works only on square images and the grad works only when channel % 16 == 0

        self.group = group or self.args.get('group')
        assert self.group in [
            1, 2
        ], "group argument needs to be 1 or 2 (1 for default conv2d)"

        self.filter_shape = numpy.asarray(self.filter_shape)
        self.input_shape = numpy.asarray(self.input_shape)

        if self.lrn:
            self.lrn_func = cross_channel_normalization_bc01

        # Params - make sure to deal with params_hook! #
        if self.group == 1:
            if params_hook:
                # make sure the params_hook has W and b
                assert len(
                ) == 2, "Expected 2 params (W and b) for ConvPoolLayer, found {0!s}!".format(
                self.W, self.b = params_hook
                self.W = get_weights_gaussian(shape=self.filter_shape,
                self.b = get_bias(shape=self.filter_shape[0],
            self.params = [self.W, self.b]
            self.filter_shape[0] = self.filter_shape[0] / 2
            self.filter_shape[1] = self.filter_shape[1] / 2

            self.input_shape[0] = self.input_shape[0] / 2
            self.input_shape[1] = self.input_shape[1] / 2
            if params_hook:
                assert len(params_hook) == 4
                self.W0, self.W1, self.b0, self.b1 = params_hook
                self.W0 = get_weights_gaussian(shape=self.filter_shape,
                self.W1 = get_weights_gaussian(shape=self.filter_shape,
                self.b0 = get_bias(shape=self.filter_shape[0],
                self.b1 = get_bias(shape=self.filter_shape[0],
            self.params = [self.W0, self.b0, self.W1, self.b1]

        # build appropriate graph for conv. version #
        self.output = self.build_computation_graph()

        # Local Response Normalization (for AlexNet)
        if self.lrn:
            self.output = self.lrn_func(self.output)

        log.debug("conv layer initialized with shape_in: %s",
    def __init__(self, config=None, defaults=_default,
                 inputs_hook=None, params_hook=None,
                 input_size=None, output_size=None,
                 cost=None, cost_args=None,
                 weights_init=None, weights_mean=None, weights_std=None, weights_interval=None,
                 noise=None, noise_level=None, mrg=None,
        # init Model to combine the defaults and config dictionaries with the initial parameters.
        initial_parameters = locals()
        super(BasicLayer, self).__init__(**initial_parameters)
        # all configuration parameters are now in self!

        # specifications #
        # grab info from the inputs_hook, or from parameters
        if self.inputs_hook is not None:  # inputs_hook is a tuple of (Shape, Input)
            assert len(self.inputs_hook) == 2, 'Expected inputs_hook to be tuple!'  # make sure inputs_hook is a tuple
            self.input_size = self.inputs_hook[0] or self.input_size
            self.input = self.inputs_hook[1]
            # make the input a symbolic matrix
            self.input = T.fmatrix('X')

        # now that we have the input specs, define the output 'target' variable to be used in supervised training!
        self.target = T.fmatrix('Y')

        # either grab the output's desired size from the parameter directly, or copy n_in
        self.output_size = self.output_size or self.input_size

        # other specifications
        # activation function!
        activation_func = get_activation_function(self.activation)
        # cost function!
        cost_func = get_cost_function(self.cost)

        # parameters - make sure to deal with params_hook! #
        if self.params_hook is not None:
            # make sure the params_hook has W (weights matrix) and b (bias vector)
            assert len(self.params_hook) == 2, \
                "Expected 2 params (W and b) for BasicLayer, found {0!s}!".format(len(self.params_hook))
            W, b = self.params_hook
            W = get_weights(weights_init=self.weights_init,
                            shape=(self.input_size, self.output_size),
                            # if gaussian
                            # if uniform

            # grab the bias vector
            b = get_bias(shape=self.output_size, name="b", init_values=self.bias_init)

        # Finally have the two parameters - weights matrix W and bias vector b. That is all!
        self.params = [W, b]

        # computation #
        # Here is the meat of the computation transforming input -> output
        # It simply involves a matrix multiplication of inputs*weights, adding the bias vector, and then passing
        # the result through our activation function (normally something nonlinear such as: max(0, output))
        self.output = activation_func(T.dot(self.input, W) + b)

        # Now deal with noise if we added it:
        if self.noise:
            log.debug('Adding noise switch.')
            if self.noise_level is not None:
                noise_func = get_noise(self.noise, self.noise_level, self.mrg)
                noise_func = get_noise(self.noise, mrg=self.mrg)
            # apply the noise as a switch!
            # default to apply noise. this is for the cost and gradient functions to be computed later
            # (not sure if the above statement is accurate such that gradient depends on initial value of switch)
            self.switch = sharedX(value=1, name="basiclayer_noise_switch")
            self.output = T.switch(self.switch,

        # now to define the cost of the model - use the cost function to compare our output with the target value.
        self.cost = cost_func(output=self.output, target=self.target, **self.cost_args)

        log.debug("Initialized a basic fully-connected layer with shape %s and activation: %s",
                  str((self.input_size, self.output_size)), str(self.activation))
    def __init__(self, inputs_hook=None, hiddens_hook=None, params_hook=None, outdir='outputs/rnnrbm/',
                 input_size=None, hidden_size=None,
                 visible_activation='sigmoid', hidden_activation='sigmoid',
                 weights_mean=0, weights_std=5e-3, weights_interval='montreal',
                 bias_init=0, mrg=RNG_MRG.MRG_RandomStreams(1),
                 rnn_hidden_size=None, rnn_hidden_activation='rectifier',
                 rnn_weights_mean=0, rnn_weights_std=5e-3, rnn_weights_interval='montreal',
        Initialize the RNN-RBM.

        inputs_hook : Tuple of (shape, variable)
            Routing information for the model to accept inputs from elsewhere. This is used for linking
            different models together. For now, it needs to include the shape information (normally the
            dimensionality of the input i.e. input_size).
        hiddens_hook : Tuple of (shape, variable)
            Routing information for the model to accept its hidden representation from elsewhere.
            This is used for linking different models together. For now, it needs to include the shape
            information (normally the dimensionality of the hiddens i.e. hidden_size).
        params_hook : List(theano shared variable)
            A list of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables).
        outdir : str
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        input_size : int
            The size (dimensionality) of the input to the RBM. If shape is provided in `inputs_hook`, this is optional.
            The :class:`Model` requires an `output_size`, which gets set to this value because the RBM is an
            unsupervised model. The output is a reconstruction of the input.
        hidden_size : int
            The size (dimensionality) of the hidden layer for the RBM.
        visible_activation : str or callable
            The nonlinear (or linear) visible activation to perform after the dot product from hiddens -> visible layer.
            This activation function should be appropriate for the input unit types, i.e. 'sigmoid' for binary inputs.
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        hidden_activation : str or callable
            The nonlinear (or linear) hidden activation to perform after the dot product from visible -> hiddens layer.
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        weights_init : str
            Determines the method for initializing model weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        mrg : random
            A random number generator that is used when sampling. The RBM is a probabilistic model, so it relies a lot
            on sampling. I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.
        k : int
            The k number of steps used for CD-k or PCD-k with Gibbs sampling. Basically, the number of samples
            generated from the model to train against reconstructing the original input.
        rnn_hidden_size : int
            The number of hidden units (dimensionality) to use in the recurrent layer.
        rnn_hidden_activation : str or Callable
            The activation function to apply to recurrent units. See opendeep.utils.activation for options.
        rnn_weights_init : str
            Determines the method for initializing recurrent weights. See opendeep.utils.nnet for options. 'Identity'
            works well with 'rectifier' `rnn_hidden_activation`.
        rnn_weights_mean : float
            If Gaussian `rnn_weights_init`, the mean value to use.
        rnn_weights_std : float
            If Gaussian `rnn_weights_init`, the standard deviation to use.
        rnn_weights_interval : str or float
            If Uniform `rnn_weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        rnn_bias_init : float
            The initial value to use for the recurrent bias parameter. Most often, the default of 0.0 is preferred.
        generate_n_steps : int
            When generating from the model, how many steps to generate.
        super(RNN_RBM, self).__init__(**{arg: val for (arg, val) in locals().items() if arg is not 'self'})

        # specifications #
        self.mrg = mrg
        self.k = k
        self.generate_n_steps = generate_n_steps

        # grab info from the inputs_hook, hiddens_hook, or from parameters
        if self.inputs_hook is not None:  # inputs_hook is a tuple of (Shape, Input)
            raise NotImplementedError("Inputs_hook not implemented yet for RNN-RBM")
            # make the input a symbolic matrix - a sequence of inputs
            self.input = T.matrix('Vs')

        # set an initial value for the recurrent hiddens
        self.u0 = T.zeros((rnn_hidden_size,))

        # make a symbolic vector for the initial recurrent hiddens value to use during generation for the model
        self.generate_u0 = T.vector("generate_u0")

        # either grab the hidden's desired size from the parameter directly, or copy n_in
        self.hidden_size = hidden_size or self.input_size

        # deal with hiddens_hook
        if self.hiddens_hook is not None:
            raise NotImplementedError("Hiddens_hook not implemented yet for RNN_RBM")

        # other specifications
        # visible activation function!
        self.visible_activation_func = get_activation_function(visible_activation)

        # make sure the sampling functions are appropriate for the activation functions.
        if is_binary(self.visible_activation_func):
            self.visible_sampling = self.mrg.binomial
            # TODO: implement non-binary activation
            log.error("Non-binary visible activation not supported yet!")
            raise NotImplementedError("Non-binary visible activation not supported yet!")

        # hidden activation function!
        self.hidden_activation_func = get_activation_function(hidden_activation)

        # make sure the sampling functions are appropriate for the activation functions.
        if is_binary(self.hidden_activation_func):
            self.hidden_sampling = self.mrg.binomial
            # TODO: implement non-binary activation
            log.error("Non-binary hidden activation not supported yet!")
            raise NotImplementedError("Non-binary hidden activation not supported yet!")

        # recurrent hidden activation function!
        self.rnn_hidden_activation_func = get_activation_function(rnn_hidden_activation)

        # symbolic scalar for how many recurrent steps to use during generation from the model
        self.n_steps = T.iscalar("generate_n_steps")

        # parameters - make sure to deal with params_hook! #
        if self.params_hook is not None:
            # make sure the params_hook has W (weights matrix) and bh, bv (bias vectors)
            assert len(self.params_hook) == 8, \
                "Expected 8 params (W, bv, bh, Wuh, Wuv, Wvu, Wuu, bu) for RBM, found {0!s}!".format(
            self.W, self.bv, self.bh, self.Wuh, self.Wuv, self.Wvu, self.Wuu, self.bu = self.params_hook
            # RBM weight params
            self.W = get_weights(weights_init=weights_init,
                                 shape=(self.input_size, self.hidden_size),
                                 # if gaussian
                                 # if uniform
            # RNN weight params
            self.Wuh = get_weights(weights_init=rnn_weights_init,
                                   shape=(rnn_hidden_size, self.hidden_size),
                                   # if gaussian
                                   # if uniform

            self.Wuv = get_weights(weights_init=rnn_weights_init,
                                   shape=(rnn_hidden_size, self.input_size),
                                   # if gaussian
                                   # if uniform

            self.Wvu = get_weights(weights_init=rnn_weights_init,
                                   shape=(self.input_size, rnn_hidden_size),
                                   # if gaussian
                                   # if uniform

            self.Wuu = get_weights(weights_init=rnn_weights_init,
                                   shape=(rnn_hidden_size, rnn_hidden_size),
                                   # if gaussian
                                   # if uniform

            # grab the bias vectors
            # rbm biases
            self.bv = get_bias(shape=self.input_size, name="bv", init_values=bias_init)
            self.bh = get_bias(shape=self.hidden_size, name="bh", init_values=bias_init)
            # rnn bias
            self.bu = get_bias(shape=rnn_hidden_size, name="bu", init_values=rnn_bias_init)

        # Finally have the parameters
        self.params = [self.W, self.bv, self.bh, self.Wuh, self.Wuv, self.Wvu, self.Wuu, self.bu]

        # Create the RNN-RBM graph!
        self.v_sample, self.cost, self.monitors, self.updates_train, self.v_ts, self.updates_generate, self.u_t = \

        log.info("Initialized an RNN-RBM!")
    def build_computation_graph(self):
        Creates the output, hiddens, updates, cost, and parameters for the RNN!

        Output, top-level hiddens, updates, cost, and parameters for the RNN.
        # parameters - make sure to deal with params_hook! #
        if self.params_hook is not None:
            # expect at least W_{x_h}, W_{h_h}, W_{h_y}, b_h, b_y -> this is for single-direction RNN.
            assert len(self.params_hook) >= 3*self.layers+2, \
                "Expected at least {0!s} params for rnn, found {1!s}!".format(3*self.layers+2, len(self.params_hook))
            W_x_h = self.params_hook[:self.layers]
            W_h_h = self.params_hook[self.layers:2*self.layers]
            b_h   = self.params_hook[2*self.layers:3*self.layers]
            W_h_y = self.params_hook[3*self.layers]
            b_y   = self.params_hook[3*self.layers+1]
            # now the case for extra parameters dealing with a backward pass in addition to forward (bidirectional)
            if self.bidirectional:
                assert len(self.params_hook) >= 4*self.layers+2, \
                    "Expected at least {0!s} params for bidirectional (merging hiddens) rnn, found {1!s}!".format(
                        4*self.layers+2, len(self.params_hook))
                # if we are merging according to DeepSpeech paper, this is all we need in addition for bidirectional.
                W_h_hb = self.params_hook[3*self.layers+2:4*self.layers+2]
        # otherwise, construct our params
            # input-to-hidden (and hidden-to-hidden higher layer) weights
            W_x_h = []
            for l in range(self.layers):
                if l > 0:
                                             shape=(self.hidden_size, self.hidden_size),
                                             name="W_%d_%d" % (l, l+1),
                                             # if gaussian
                                             # if uniform
                                             shape=(self.input_size, self.hidden_size),
                                             name="W_%d_%d" % (l, l+1),
                                             # if gaussian
                                             # if uniform
            # hidden-to-hidden same layer weights
            W_h_h = [get_weights(weights_init=self.r_weights_init,
                                 shape=(self.hidden_size, self.hidden_size),
                                 name="W_%d_%d" % (l+1, l+1),
                                 # if gaussian
                                 # if uniform
                     for l in range(self.layers)]
            # hidden-to-output weights
            W_h_y = get_weights(weights_init=self.weights_init,
                                shape=(self.hidden_size, self.output_size),
                                # if gaussian
                                # if uniform
            # hidden bias for each layer
            b_h = [get_bias(shape=(self.hidden_size,),
                            name="b_h_%d" % (l+1),
                   for l in range(self.layers)]
            # output bias
            b_y = get_bias(shape=(self.output_size,),
            # extra parameters necessary for second backward pass on hiddens if this is bidirectional
            if self.bidirectional:
                # hidden-to-hidden same layer backward weights.
                W_h_hb = [get_weights(weights_init=self.r_weights_init,
                                      shape=(self.hidden_size, self.hidden_size),
                                      name="W_%d_%db" % (l+1, l+1),
                                      # if gaussian
                                      # if uniform
                          for l in range(self.layers)]

        # put all the parameters into our list, and make sure it is in the same order as when we try to load
        # them from a params_hook!!!
        params = W_x_h + W_h_h + b_h + [W_h_y] + [b_y]
        if self.bidirectional:
            params += W_h_hb

        # make h_init the right sized tensor
        if not self.hiddens_hook:
            self.h_init = T.zeros_like(T.dot(self.input[0], W_x_h[0]))

        # computation #
        hiddens = self.input
        updates = dict()
        # vanilla case! there will be only 1 hidden layer for each depth layer.
        for layer in range(self.layers):
            log.debug("Updating hidden layer %d" % (layer+1))
            # normal case - either forward or just backward!
            hiddens_new, updates = theano.scan(
                non_sequences=[W_x_h[layer], W_h_h[layer], b_h[layer]],
                name="rnn_scan_normal_%d" % layer,

            # bidirectional case - need to add a backward sequential pass to compute new hiddens!
            if self.bidirectional:
                # now do the opposite direction for the scan!
                hiddens_opposite, updates_opposite = theano.scan(
                    non_sequences=[W_x_h[layer], W_h_hb[layer], b_h[layer]],
                    go_backwards=(not self.backward),
                    name="rnn_scan_backward_%d" % layer,
                hiddens_new = hiddens_new + hiddens_opposite

            # replace the hiddens with the newly computed hiddens (and add noise)!
            hiddens = hiddens_new
            # add noise (like dropout) if we wanted it!
            if self.noise:
                self.hiddens = T.switch(self.noise_switch,

        # now compute the outputs from the leftover (top level) hiddens
        output = self.activation_func(
            T.dot(hiddens, W_h_y) + b_y

        # now to define the cost of the model - use the cost function to compare our output with the target value.
        cost = self.cost_function(output=output, target=self.target, **self.cost_args)

        log.info("Initialized a %s RNN!" % self.direction)
        return output, hiddens, updates, cost, params
    def __init__(self, inputs_hook=None, hiddens_hook=None, params_hook=None, outdir='outputs/rbm/',
                 input_size=None, hidden_size=None,
                 visible_activation='sigmoid', hidden_activation='sigmoid',
                 weights_init='uniform', weights_mean=0, weights_std=5e-3, weights_interval='montreal',
        RBM constructor. Defines the parameters of the model along with
        basic operations for inferring hidden from visible (and vice-versa),
        as well as for performing CD updates.

        inputs_hook : Tuple of (shape, variable)
            Routing information for the model to accept inputs from elsewhere. This is used for linking
            different models together. For now, it needs to include the shape information (normally the
            dimensionality of the input i.e. input_size).
        hiddens_hook : Tuple of (shape, variable)
            Routing information for the model to accept its hidden representation from elsewhere.
            This is used for linking different models together. For now, it needs to include the shape
            information (normally the dimensionality of the hiddens i.e. hidden_size).
        params_hook : List(theano shared variable)
            A list of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables).
        outdir : str
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        input_size : int
            The size (dimensionality) of the input to the RBM. If shape is provided in `inputs_hook`, this is optional.
            The :class:`Model` requires an `output_size`, which gets set to this value because the RBM is an
            unsupervised model. The output is a reconstruction of the input.
        hidden_size : int
            The size (dimensionality) of the hidden layer for the RBM.
        visible_activation : str or callable
            The nonlinear (or linear) visible activation to perform after the dot product from hiddens -> visible layer.
            This activation function should be appropriate for the input unit types, i.e. 'sigmoid' for binary inputs.
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        hidden_activation : str or callable
            The nonlinear (or linear) hidden activation to perform after the dot product from visible -> hiddens layer.
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        weights_init : str
            Determines the method for initializing model weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        mrg : random
            A random number generator that is used when sampling. The RBM is a probabilistic model, so it relies a lot
            on sampling. I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.
        k : int
            The k number of steps used for CD-k or PCD-k with Gibbs sampling. Basically, the number of samples
            generated from the model to train against reconstructing the original input.
        # init Model to combine the defaults and config dictionaries with the initial parameters.
        super(RBM, self).__init__(**{arg: val for (arg, val) in locals().iteritems() if arg is not 'self'})

        # specifications #
        # grab info from the inputs_hook, hiddens_hook, or from parameters
        if inputs_hook is not None:  # inputs_hook is a tuple of (Shape, Input)
            assert len(inputs_hook) == 2, 'Expected inputs_hook to be tuple!'  # make sure inputs_hook is a tuple
            self.input = inputs_hook[1]
            # make the input a symbolic matrix
            self.input = T.matrix('V')

        # either grab the hidden's desired size from the parameter directly, or copy n_in
        hidden_size = hidden_size or self.input_size

        # get the number of steps k
        self.k = k

        # deal with hiddens_hook
        if hiddens_hook is not None:
            # make sure hiddens_hook is a tuple
            assert len(hiddens_hook) == 2, 'Expected hiddens_hook to be tuple!'
            hidden_size = hiddens_hook[0] or hidden_size
            self.hiddens_init = hiddens_hook[1]
            self.hiddens_init = None

        # other specifications
        # visible activation function!
        self.visible_activation_func = get_activation_function(visible_activation)

        # make sure the sampling functions are appropriate for the activation functions.
        if is_binary(self.visible_activation_func):
            self.visible_sampling = mrg.binomial
            # TODO: implement non-binary activation
            log.error("Non-binary visible activation not supported yet!")
            raise NotImplementedError("Non-binary visible activation not supported yet!")

        # hidden activation function!
        self.hidden_activation_func = get_activation_function(hidden_activation)

        # make sure the sampling functions are appropriate for the activation functions.
        if is_binary(self.hidden_activation_func):
            self.hidden_sampling = mrg.binomial
            # TODO: implement non-binary activation
            log.error("Non-binary hidden activation not supported yet!")
            raise NotImplementedError("Non-binary hidden activation not supported yet!")

        # parameters - make sure to deal with params_hook! #
        if params_hook is not None:
            # make sure the params_hook has W (weights matrix) and bh, bv (bias vectors)
            assert len(params_hook) == 3, \
                "Expected 3 params (W, bv, bh) for RBM, found {0!s}!".format(len(params_hook))
            # doesn't matter if bv and bh are vectors or matrices.
            self.W, self.bv, self.bh = params_hook
            hidden_size = self.W.shape[1].eval()
            self.W = get_weights(weights_init=weights_init,
                                 shape=(self.input_size, hidden_size),
                                 # if gaussian
                                 # if uniform

            # grab the bias vectors
            self.bv = get_bias(shape=self.input_size, name="bv", init_values=bias_init)
            self.bh = get_bias(shape=hidden_size, name="bh", init_values=bias_init)

        # Finally have the parameters
        self.params = [self.W, self.bv, self.bh]

        # Create the RBM graph!
        self.cost, self.monitors, self.updates, self.v_sample, self.h_sample = self._build_rbm()

        log.debug("Initialized an RBM shape %s",
                  str((self.input_size, hidden_size)))
    def __init__(self, inputs_hook=None, hiddens_hook=None, params_hook=None, outdir=None,
                 input_size=None, hidden_size=None,
                 layers=2, walkbacks=4,
                 visible_activation='sigmoid', hidden_activation='tanh',
                 input_sampling=True, mrg=RNG_MRG.MRG_RandomStreams(1),
                 weights_init='uniform', weights_interval='montreal', weights_mean=0, weights_std=5e-3,
                 cost_function='binary_crossentropy', cost_args=None,
                 add_noise=True, noiseless_h1=True,
                 hidden_noise='gaussian', hidden_noise_level=2, input_noise='salt_and_pepper', input_noise_level=0.4,
                 noise_decay='exponential', noise_annealing=1,
                 image_width=None, image_height=None,
                 rnn_hidden_size=None, rnn_hidden_activation='rectifier',
                 rnn_weights_mean=0, rnn_weights_std=5e-3, rnn_weights_interval='montreal',
        Initialize an RNN-GSN.

        inputs_hook : Tuple of (shape, variable)
            Routing information for the model to accept inputs from elsewhere. This is used for linking
            different models together (e.g. setting the Softmax model's input layer to the DAE's hidden layer gives a
            newly supervised classification model). For now, it needs to include the shape information (normally the
            dimensionality of the input i.e. n_in).
        hiddens_hook : Tuple of (shape, variable)
            Routing information for the model to accept its hidden representation from elsewhere.
            This is used for linking different models together (e.g. setting the DAE model's hidden layers to the RNN's
            output layer gives a generative recurrent model.) For now, it needs to include the shape
            information (normally the dimensionality of the hiddens i.e. n_hidden).
        params_hook : List(theano shared variable)
            A list of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters - such as a training model with dropout applied
            to layers and one without for testing, where the parameters are shared between the two.
        outdir : str
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        input_size : int
            The size (dimensionality) of the input to the DAE. If shape is provided in `inputs_hook`, this is optional.
            The :class:`Model` requires an `output_size`, which gets set to this value because the DAE is an
            unsupervised model. The output is a reconstruction of the input.
        hidden_size : int
            The size (dimensionality) of the hidden layer for the DAE. Generally, you want it to be larger than
            `input_size`, which is known as *overcomplete*.
        layers : int
            The number of hidden layers to use.
        walkbacks : int
            The number of walkbacks to perform (the variable K in Bengio's paper above). A walkback is a Gibbs sample
            from the DAE, which means the model generates inputs in sequence, where each generated input is compared
            to the original input to create the reconstruction cost for training. For running the model, the very last
            generated input in the Gibbs chain is used as the output.
        visible_activation : str or callable
            The nonlinear (or linear) visible activation to perform after the dot product from hiddens -> visible layer.
            This activation function should be appropriate for the input unit types, i.e. 'sigmoid' for binary inputs.
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        hidden_activation : str or callable
            The nonlinear (or linear) hidden activation to perform after the dot product from visible -> hiddens layer.
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        input_sampling : bool
            During walkbacks, whether to sample from the generated input to create a new starting point for the next
            walkback (next step in the Gibbs chain). This generally makes walkbacks more effective by making the
            process more stochastic - more likely to find spurious modes in the model's representation.
        mrg : random
            A random number generator that is used when adding noise into the network and for sampling from the input.
            I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.
        tied_weights : bool
            DAE has two weight matrices - W from input -> hiddens and V from hiddens -> input. This boolean
            determines if V = W.T, which 'ties' V to W and reduces the number of parameters necessary during training.
        weights_init : str
            Determines the method for initializing model weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        cost_function : str or callable
            The function to use when calculating the reconstruction cost of the model. This should be appropriate
            for the type of input, i.e. use 'binary_crossentropy' for binary inputs, or 'mse' for real-valued inputs.
            See opendeep.utils.cost for options. You can also specify your own function, which needs to be callable.
        cost_args : dict
            Any additional named keyword arguments to pass to the specified `cost_function`.
        add_noise : bool
            Whether to add noise (corrupt) the input before passing it through the computation graph during training.
            This should most likely be set to the default of True, because this is a *denoising* autoencoder after all.
        noiseless_h1 : bool
            Whether to not add noise (corrupt) the hidden layer during computation.
        hidden_noise : str
            What type of noise to use for corrupting the hidden layer (if not `noiseless_h1`). See opendeep.utils.noise
            for options. This should be appropriate for the hidden unit activation, i.e. Gaussian for tanh or other
            real-valued activations, etc.
        hidden_noise_level : float
            The amount of noise to use for the noise function specified by `hidden_noise`. This could be the
            standard deviation for gaussian noise, the interval for uniform noise, the dropout amount, etc.
        input_noise : str
            What type of noise to use for corrupting the input before computation (if `add_noise`).
            See opendeep.utils.noise for options. This should be appropriate for the input units, i.e. salt-and-pepper
            for binary units, etc.
        input_noise_level : float
            The amount of noise used to corrupt the input. This could be the masking probability for salt-and-pepper,
            standard deviation for Gaussian, interval for Uniform, etc.
        noise_decay : str or False
            Whether to use `input_noise` scheduling (decay `input_noise_level` during the course of training),
            and if so, the string input specifies what type of decay to use. See opendeep.utils.decay for options.
            Noise decay (known as noise scheduling) effectively helps the DAE learn larger variance features first,
            and then smaller ones later (almost as a kind of curriculum learning). May help it converge faster.
        noise_annealing : float
            The amount to reduce the `input_noise_level` after each training epoch based on the decay function specified
            in `noise_decay`.
        image_width : int
            If the input should be represented as an image, the width of the input image. If not specified, it will be
            close to the square factor of the `input_size`.
        image_height : int
            If the input should be represented as an image, the height of the input image. If not specified, it will be
            close to the square factor of the `input_size`.
        rnn_hidden_size : int
            The number of hidden units (dimensionality) to use in the recurrent layer.
        rnn_hidden_activation : str or Callable
            The activation function to apply to recurrent units. See opendeep.utils.activation for options.
        rnn_weights_init : str
            Determines the method for initializing recurrent weights. See opendeep.utils.nnet for options. 'Identity'
            works well with 'rectifier' `rnn_hidden_activation`.
        rnn_weights_mean : float
            If Gaussian `rnn_weights_init`, the mean value to use.
        rnn_weights_std : float
            If Gaussian `rnn_weights_init`, the standard deviation to use.
        rnn_weights_interval : str or float
            If Uniform `rnn_weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        rnn_bias_init : float
            The initial value to use for the recurrent bias parameter. Most often, the default of 0.0 is preferred.
        generate_n_steps : int
            When generating from the model, how many steps to generate.
        initial_parameters = locals().copy()
        super(RNN_GSN, self).__init__(**initial_parameters)

        # specifications #
        self.layers = layers
        self.walkbacks = walkbacks
        self.input_sampling = input_sampling
        self.mrg = mrg
        self.tied_weights = tied_weights
        self.noise_decay = noise_decay
        self.noise_annealing = noise_annealing
        self.add_noise = add_noise
        self.noiseless_h1 = noiseless_h1
        self.hidden_noise = hidden_noise
        self.hidden_noise_level = hidden_noise_level
        self.input_noise = input_noise
        self.input_noise_level = input_noise_level
        self.image_width = image_width
        self.image_height = image_height

        # grab info from the inputs_hook, hiddens_hook, or from parameters
        if self.inputs_hook is not None:  # inputs_hook is a tuple of (Shape, Input)
            raise NotImplementedError("Inputs_hook not implemented yet for RNN-GSN")
            # make the input a symbolic matrix - a sequence of inputs
            self.input = T.matrix('Xs')

        # set an initial value for the recurrent hiddens
        self.u0 = T.zeros((rnn_hidden_size,))

        # make a symbolic vector for the initial recurrent hiddens value to use during generation for the model
        self.generate_u0 = T.vector("generate_u0")

        # either grab the hidden's desired size from the parameter directly, or copy n_in
        self.hidden_size = hidden_size or self.input_size

        # deal with hiddens_hook
        if self.hiddens_hook is not None:
            raise NotImplementedError("Hiddens_hook not implemented yet for RNN-GSN")

        # other specifications
        # visible activation function!
        self.visible_activation_func = get_activation_function(visible_activation)

        # make sure the sampling functions are appropriate for the activation functions.
        if is_binary(self.visible_activation_func):
            self.visible_sampling = mrg.binomial
            # TODO: implement non-binary activation
            log.error("Non-binary visible activation not supported yet!")
            raise NotImplementedError("Non-binary visible activation not supported yet!")

        # hidden activation function!
        self.hidden_activation_func = get_activation_function(hidden_activation)

        # recurrent hidden activation function!
        self.rnn_hidden_activation_func = get_activation_function(rnn_hidden_activation)

        # Cost function
        self.cost_function = get_cost_function(cost_function)
        self.cost_args = cost_args

        # symbolic scalar for how many recurrent steps to use during generation from the model
        self.n_steps = T.iscalar("generate_n_steps")

        # determine the sizes of each layer in a list.
        # layer sizes, from h0 to hK (h0 is the visible layer)
        self.layer_sizes = [self.input_size] + [self.hidden_size] * self.layers

        # parameters - make sure to deal with params_hook! #
        if self.params_hook is not None:
            # if tied weights, expect (layers*2 + 1) params for GSN and (int(layers+1)/int(2) + 3) for RNN
            if self.tied_weights:
                expected_num = (2*self.layers + 1) + (int(self.layers+1)/2 + 3)
                assert len(self.params_hook) == expected_num, \
                    "Tied weights: expected {0!s} params, found {1!s}!".format(expected_num, len(self.params_hook))
                gsn_len = (2*self.layers + 1)
                self.weights_list = self.params_hook[:self.layers]
                self.bias_list = self.params_hook[self.layers:gsn_len]

            # if untied weights, expect layers*3 + 1 params
                expected_num = (3*self.layers + 1) + (int(self.layers + 1)/2 + 3)
                assert len(self.params_hook) == expected_num, \
                    "Untied weights: expected {0!s} params, found {1!s}!".format(expected_num, len(self.params_hook))
                gsn_len = (3*self.layers + 1)
                self.weights_list = self.params_hook[:2*self.layers]
                self.bias_list = self.params_hook[2*self.layers:gsn_len]

            rnn_len = gsn_len + int(self.layers + 1) / 2
            self.recurrent_to_gsn_weights_list = self.params_hook[gsn_len:rnn_len]
            self.W_u_u = self.params_hook[rnn_len:rnn_len + 1]
            self.W_x_u = self.params_hook[rnn_len + 1:rnn_len + 2]
            self.recurrent_bias = self.params_hook[rnn_len + 2:rnn_len + 3]

        # otherwise, construct our params
            # initialize a list of weights and biases based on layer_sizes for the GSN
            self.weights_list = [get_weights(weights_init=weights_init,
                                             shape=(self.layer_sizes[i], self.layer_sizes[i + 1]),
                                             name="W_{0!s}_{1!s}".format(i, i + 1),
                                             # if gaussian
                                             # if uniform
                                 for i in range(self.layers)]
            # add more weights if we aren't tying weights between layers (need to add for higher-lower layers now)
            if not self.tied_weights:
                                 shape=(self.layer_sizes[i + 1], self.layer_sizes[i]),
                                 name="W_{0!s}_{1!s}".format(i + 1, i),
                                 # if gaussian
                                 # if uniform
                     for i in reversed(range(self.layers))]
            # initialize each layer bias to 0's.
            self.bias_list = [get_bias(shape=(self.layer_sizes[i],),
                                       name='b_' + str(i),
                              for i in range(self.layers + 1)]

            self.recurrent_to_gsn_weights_list = [
                            shape=(rnn_hidden_size, self.layer_sizes[layer]),
                            # if gaussian
                            # if uniform
                for layer in range(self.layers + 1) if layer % 2 != 0
            self.W_u_u = get_weights(weights_init=rnn_weights_init,
                                     shape=(rnn_hidden_size, rnn_hidden_size),
                                     # if gaussian
                                     #if uniform
            self.W_x_u = get_weights(weights_init=rnn_weights_init,
                                     shape=(self.input_size, rnn_hidden_size),
                                     # if gaussian
                                     # if uniform
            self.recurrent_bias = get_bias(shape=(rnn_hidden_size,),

        # build the params of the model into a list
        self.gsn_params = self.weights_list + self.bias_list
        self.params = self.gsn_params + \
                      self.recurrent_to_gsn_weights_list + \
                      [self.W_u_u, self.W_x_u, self.recurrent_bias]
        log.debug("rnn-gsn params: %s", str(self.params))

        # Create the RNN-GSN graph!
        self.x_sample, self.cost, self.monitors, self.updates_train, self.x_ts, self.updates_generate, self.u_t = \

        log.info("Initialized an RNN-GSN!")
    def __init__(self,
        # init Model to combine the defaults and config dictionaries.
        super(BasicLayer, self).__init__(config, defaults)
        # all configuration parameters are now in self.args

        # specifications #
        # grab info from the inputs_hook, or from parameters
        if inputs_hook:  # inputs_hook is a tuple of (Shape, Input)
            assert len(inputs_hook) == 2  # make sure inputs_hook is a tuple
            input_size = inputs_hook[0] or input_size
            self.input = inputs_hook[1]
            # either grab from the parameter directly or self.args config
            input_size = input_size or self.args.get('input_size')
            # make the input a symbolic matrix
            self.input = T.fmatrix('X')
        # either grab from the parameter directly, self.args config, or copy n_in
        output_size = output_size or self.args.get('output_size') or input_size

        # other specifications
        weights_init = weights_init or self.args.get('weights_init')
        # for gaussian weights
        mean = weights_mean or self.args.get('weights_mean')
        std = weights_std or self.args.get('weights_std')
        # for uniform weights
        interval = weights_interval or self.args.get('weights_interval')
        # for bias
        bias_init = bias_init or self.args.get('bias_init')
        # activation function!
        activation_name = activation or self.args.get('activation')
        if isinstance(activation_name, basestring):
            activation_func = get_activation_function(activation_name)
            assert callable(activation_name)
            activation_func = activation_name

        # parameters - make sure to deal with params_hook! #
        if params_hook:
            assert len(
            ) == 2, "Expected 2 params (W and b) for BasicLayer, found {0!s}!".format(
                len(params_hook))  # make sure the params_hook has W and b
            W, b = params_hook
            # if we are initializing weights from a gaussian
            if weights_init.lower() == 'gaussian':
                W = get_weights_gaussian(shape=(input_size, output_size),
            # if we are initializing weights from a uniform distribution
            elif self.args.get('weights_init').lower() == 'uniform':
                W = get_weights_uniform(shape=(input_size, output_size),
            # otherwise not implemented
                    "Did not recognize weights_init %s! Pleas try gaussian or uniform"
                    % str(self.args.get('weights_init')))
                raise NotImplementedError(
                    "Did not recognize weights_init %s! Pleas try gaussian or uniform"
                    % str(self.args.get('weights_init')))

            b = get_bias(shape=output_size, name="b", init_values=bias_init)

        # Finally have the two parameters!
        self.params = [W, b]

        # computation #
        # Here is the meat of the computation transforming input -> output
        self.output = activation_func(T.dot(self.input, W) + b)

            "Initialized a basic fully-connected layer with shape %s and activation: %s"
            % str((input_size, output_size)), str(activation_name))
    def __init__(self,
        # Now, initialize with Model class to combine config and defaults!
        # Here, defaults is defined via a dictionary. However, you could also
        # pass a filename to a JSON or YAML file with the same format.
        super(DenoisingAutoencoder, self).__init__(config=config,
        # Any parameter from the 'config' will overwrite the 'defaults' dictionary.
        # These parameters are now accessible from the 'self.args' variable!

        # When accessing model parameters, it is best practice to try to find the parameters
        # explicitly passed first, and then go to the 'self.args' configuration.

        # Define model hyperparameters
        # deal with the inputs_hook and hiddens_hook for the size parameters!
        # if the hook exists, grab the size from the first element of the tuple.
        if inputs_hook:
            input_size = inputs_hook[0]
        # otherwise, grab the size from the configurations.
            input_size = input_size or self.args.get('input_size')
        if hiddens_hook:
            hidden_size = hiddens_hook[0]
            hidden_size = hidden_size or self.args.get('hidden_size')

        corruption_level = corruption_level or self.args.get(

        # use the helper methods to grab appropriate activation functions from names!
        hidden_act_name = hidden_activation or self.args.get(
        hidden_activation = get_activation_function(hidden_act_name)
        visible_act_name = visible_activation or self.args.get(
        visible_activation = get_activation_function(visible_act_name)

        # do the same for the cost function
        cost_func_name = cost_function or self.args.get('cost_function')
        cost_function = get_cost_function(cost_func_name)

        # Now, define the symbolic input to the model (Theano)
        # We use a matrix rather than a vector so that minibatch processing can be done in parallel.
        # Make sure to deal with 'inputs_hook' if it exists!
        if inputs_hook:
            # grab the new input variable from the inputs_hook tuple
            x = inputs_hook[1]
            x = T.fmatrix("X")
        self.inputs = [x]

        # Build the model's parameters - a weight matrix and two bias vectors
        # Make sure to deal with 'params_hook' if it exists!
        if params_hook:
            # check to see if it contains the three necessary variables
            assert len(params_hook
                       ) == 3, "Not correct number of params to DAE, needs 3!"
            W, b0, b1 = params_hook
            W = get_weights_uniform(shape=(input_size, hidden_size), name="W")
            b0 = get_bias(shape=input_size, name="b0")
            b1 = get_bias(shape=hidden_size, name="b1")
        self.params = [W, b0, b1]

        # Perform the computation for a denoising autoencoder!
        # first, add noise (corrupt) the input
        corrupted_input = salt_and_pepper(input=x,
        # next, compute the hidden layer given the inputs (the encoding function)
        # We don't need to worry about hiddens_hook during training, because we can't
        # compute a cost without having the input!
        # hiddens_hook is more for the predict function and linking methods below.
        hiddens = hidden_activation(T.dot(corrupted_input, W) + b1)
        # finally, create the reconstruction from the hidden layer (we tie the weights with W.T)
        reconstruction = visible_activation(T.dot(hiddens, W.T) + b0)
        # the training cost is reconstruction error
        self.train_cost = cost_function(output=reconstruction, target=x)

        # Compile everything into a Theano function for prediction!
        # When using real-world data in predictions, we wouldn't corrupt the input first.
        # Therefore, create another version of the hiddens and reconstruction without adding the noise.
        # Here is where we would handle hiddens_hook because this is a generative model!
        # For the predict function, it would take in the hiddens instead of the input variable x.
        if hiddens_hook:
            self.hiddens = hiddens_hook[1]
            self.hiddens = hidden_activation(T.dot(x, W) + b1)
        # make the reconstruction (generated) from the hiddens
        self.recon_predict = visible_activation(T.dot(self.hiddens, W.T) + b0)
        # now compile the predict function accordingly - if it used x or hiddens as the input.
        if hiddens_hook:
            self.f_predict = function(inputs=[self.hiddens],
            self.f_predict = function(inputs=[x], outputs=self.recon_predict)
    def __init__(self,
        Initialize a convpool layer.

        inputs_hook : Tuple of (shape, variable)
            Routing information for the model to accept inputs from elsewhere. This is used for linking
            different models together. For now, it needs to include the shape information.
        params_hook : List(theano shared variable)
            A list of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables).
        outdir : str
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        input_size : tuple
            Shape of the incoming data: (batch_size, num_channels, input_height, input_width).
        filter_shape : tuple
            (num_filters, num_channels, filter_height, filter_width). This is also the shape of the weights matrix.
        convstride : int
            The distance between the receptive field centers of neighboring units. This is the 'subsample' of theano's
            convolution operation.
        padsize : int
            This is the border_mode for theano's convolution operation.
        group : int
            Not yet supported, used for multi-gpu implementation.
            .. todo:: support multi-gpu
        poolsize : int
            How much to downsample the output.
        poolstride : int
            The stride width for downsampling the output.
        weights_init : str
            Determines the method for initializing model weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        activation : str or Callable
            The activation function to apply to the layer. See opendeep.utils.activation for options.
        convolution : str or Callable
            The 2-dimensional convolution implementation to use. The default of 'conv2d' is normally fine because it
            uses theano's tensor.nnet.conv.conv2d, which cherry-picks the best implementation with a meta-optimizer if
            you set the theano configuration flag 'optimizer_including=conv_meta'. Otherwise, you could pass a
            callable function, such as cudnn or cuda-convnet if you don't want to use the meta-optimizer.
        mrg : random
            A random number generator that is used when adding noise.
            I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.
        super(ConvPoolLayer, self).__init__(
            {arg: val
             for (arg, val) in locals().items() if arg is not 'self'})

        # deal with the inputs coming from inputs_hook - necessary for now to give an input hook
        # inputs_hook is a tuple of (Shape, Input)
        if self.inputs_hook:
            assert len(
            ) == 2, "expecting inputs_hook to be tuple of (shape, input)"
            self.input = inputs_hook[1]
            self.input = T.ftensor4("X")

        self.group = group

        # layer configuration #
        # activation function!
        self.activation_func = get_activation_function(activation)

        # convolution function!
        if convolution == 'conv2d':
            # using the theano flag optimizer_including=conv_meta will let this conv function optimize itself.
            self.convolution_func = T.nnet.conv2d
            assert callable(
            ), "Input convolution was not 'conv2d' and was not Callable."
            self.convolution_func = convolution

        # expect image_shape to be bc01!
        self.channel = self.input_size[1]

        self.convstride = convstride
        self.padsize = padsize

        self.poolstride = poolstride
        self.poolsize = poolsize

        # if lib_conv is cudnn, it works only on square images and the grad works only when channel % 16 == 0

        assert self.group in [
            1, 2
        ], "group argument needs to be 1 or 2 (1 for default conv2d)"

        filter_shape = numpy.asarray(filter_shape)
        self.input_size = numpy.asarray(self.input_size)

        if local_response_normalization:
            lrn_func = cross_channel_normalization_bc01
            lrn_func = None

        # Params - make sure to deal with params_hook! #
        if self.group == 1:
            if self.params_hook:
                # make sure the params_hook has W and b
                assert len(self.params_hook) == 2, \
                    "Expected 2 params (W and b) for ConvPoolLayer, found {0!s}!".format(len(self.params_hook))
                self.W, self.b = self.params_hook
                self.W = get_weights(
                    # if gaussian
                    # if uniform

                self.b = get_bias(shape=filter_shape[0],

            self.params = [self.W, self.b]

            filter_shape[0] = filter_shape[0] / 2
            filter_shape[1] = filter_shape[1] / 2

            self.input_size[0] = self.input_size[0] / 2
            self.input_size[1] = self.input_size[1] / 2
            if self.params_hook:
                assert len(self.params_hook
                           ) == 4, "expected params_hook to have 4 params"
                self.W0, self.W1, self.b0, self.b1 = self.params_hook
                self.W0 = get_weights_gaussian(shape=filter_shape, name="W0")
                self.W1 = get_weights_gaussian(shape=filter_shape, name="W1")
                self.b0 = get_bias(shape=filter_shape[0],
                self.b1 = get_bias(shape=filter_shape[0],
            self.params = [self.W0, self.b0, self.W1, self.b1]

        # build appropriate graph for conv. version #
        self.output = self._build_computation_graph()

        # Local Response Normalization (for AlexNet)
        if local_response_normalization and lrn_func is not None:
            self.output = lrn_func(self.output)

        log.debug("convpool layer initialized with shape_in: %s",
    def __init__(self, inputs_hook=None, hiddens_hook=None, params_hook=None, outdir='outputs/rbm/',
                 input_size=None, hidden_size=None,
                 visible_activation='sigmoid', hidden_activation='sigmoid',
                 weights_init='uniform', weights_mean=0, weights_std=5e-3, weights_interval='montreal',
                 k=15, persistent=True):
        RBM constructor. Defines the parameters of the model along with
        basic operations for inferring hidden from visible (and vice-versa),
        as well as for performing CD updates.

        inputs_hook : Tuple of (shape, variable)
            Routing information for the model to accept inputs from elsewhere. This is used for linking
            different models together. For now, it needs to include the shape information (normally the
            dimensionality of the input i.e. input_size).
        hiddens_hook : Tuple of (shape, variable)
            Routing information for the model to accept its hidden representation from elsewhere.
            This is used for linking different models together. For now, it needs to include the shape
            information (normally the dimensionality of the hiddens i.e. hidden_size).
        params_hook : List(theano shared variable)
            A list of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables).
        outdir : str
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        input_size : int
            The size (dimensionality) of the input to the RBM. If shape is provided in `inputs_hook`, this is optional.
            The :class:`Model` requires an `output_size`, which gets set to this value because the RBM is an
            unsupervised model. The output is a reconstruction of the input.
        hidden_size : int
            The size (dimensionality) of the hidden layer for the RBM.
        visible_activation : str or callable
            The nonlinear (or linear) visible activation to perform after the dot product from hiddens -> visible layer.
            This activation function should be appropriate for the input unit types, i.e. 'sigmoid' for binary inputs.
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        hidden_activation : str or callable
            The nonlinear (or linear) hidden activation to perform after the dot product from visible -> hiddens layer.
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        weights_init : str
            Determines the method for initializing model weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        mrg : random
            A random number generator that is used when sampling. The RBM is a probabilistic model, so it relies a lot
            on sampling. I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.
        k : int
            The k number of steps used for CD-k or PCD-k with Gibbs sampling. Basically, the number of samples
            generated from the model to train against reconstructing the original input.
        # init Model to combine the defaults and config dictionaries with the initial parameters.
        super(RBM, self).__init__(**{arg: val for (arg, val) in locals().items() if arg is not 'self'})

        # specifications #
        # grab info from the inputs_hook, hiddens_hook, or from parameters
        if inputs_hook is not None:  # inputs_hook is a tuple of (Shape, Input)
            assert len(inputs_hook) == 2, 'Expected inputs_hook to be tuple!'  # make sure inputs_hook is a tuple
            self.input = inputs_hook[1]
            # make the input a symbolic matrix
            self.input = T.matrix('V')

        # either grab the hidden's desired size from the parameter directly, or copy n_in
        hidden_size = hidden_size or self.input_size

        # get the number of steps k
        self.k = k

        # deal with hiddens_hook
        if hiddens_hook is not None:
            # make sure hiddens_hook is a tuple
            assert len(hiddens_hook) == 2, 'Expected hiddens_hook to be tuple!'
            hidden_size = hiddens_hook[0] or hidden_size
            self.hiddens_init = hiddens_hook[1]
            self.hiddens_init = None

        # other specifications
        # visible activation function!
        self.visible_activation_func = get_activation_function(visible_activation)

        # make sure the sampling functions are appropriate for the activation functions.
        if is_binary(self.visible_activation_func):
            self.visible_sampling = mrg.binomial
            # TODO: implement non-binary activation
            log.error("Non-binary visible activation not supported yet!")
            raise NotImplementedError("Non-binary visible activation not supported yet!")

        # hidden activation function!
        self.hidden_activation_func = get_activation_function(hidden_activation)

        # make sure the sampling functions are appropriate for the activation functions.
        if is_binary(self.hidden_activation_func):
            self.hidden_sampling = mrg.binomial
            # TODO: implement non-binary activation
            log.error("Non-binary hidden activation not supported yet!")
            raise NotImplementedError("Non-binary hidden activation not supported yet!")

        # parameters - make sure to deal with params_hook! #
        if params_hook is not None:
            # make sure the params_hook has W (weights matrix) and bh, bv (bias vectors)
            assert len(params_hook) == 3, \
                "Expected 3 params (W, bv, bh) for RBM, found {0!s}!".format(len(params_hook))
            # doesn't matter if bv and bh are vectors or matrices.
            self.W, self.bv, self.bh = params_hook
            hidden_size = self.W.shape[1].eval()
            self.W = get_weights(weights_init=weights_init,
                                 shape=(self.input_size, hidden_size),
                                 # if gaussian
                                 # if uniform

            # grab the bias vectors
            self.bv = get_bias(shape=self.input_size, name="bv", init_values=bias_init)
            self.bh = get_bias(shape=hidden_size, name="bh", init_values=bias_init)

        # Finally have the parameters
        self.params = [self.W, self.bv, self.bh]

        # Create the RBM graph!
        self.cost, self.monitors, self.updates, self.v_sample, self.h_sample = self._build_rbm()

        log.debug("Initialized an RBM shape %s",
                  str((self.input_size, hidden_size)))
    def __init__(self,
        super(Conv1D, self).__init__(config=config, defaults=defaults)
        # configs can now be accessed through self.args

        # specifications #
        # grab info from the inputs_hook, or from parameters
        # expect input to be in the form (B, C, I) (batch, channel, input data)
        #  inputs_hook is a tuple of (Shape, Input)
        if inputs_hook:
            # make sure inputs_hook is a tuple
            assert len(
            ) == 2, "expecting inputs_hook to be tuple of (shape, input)"
            input_shape = inputs_hook[0] or input_shape or self.args.get(
            self.input = inputs_hook[1]
            # either grab from the parameter directly or self.args config
            input_shape = input_shape or self.args.get('input_shape')
            # make the input a symbolic matrix
            self.input = T.ftensor3('X')

        # activation function!
        activation_name = activation or self.args.get('activation')
        if isinstance(activation_name, basestring):
            activation_func = get_activation_function(activation_name)
            activation_func = activation_name
            assert callable(
            ), "Activation function either needs to be a string name or callable!"

        # filter shape should be in the form (num_filters, num_channels, filter_length)
        filter_shape = filter_shape or self.args.get('filter_shape')
        num_filters = filter_shape[0]
        filter_length = filter_shape[2]
        stride = stride or self.args.get('stride')
        border_mode = border_mode or self.args.get('border_mode')
        convolution = convolution or self.args.get('convolution')

        weights_init = weights_init or self.args.get('weights_init')
        weights_interval = weights_interval or self.args.get(
        weights_mean = weights_mean or self.args.get('weights_mean')
        weights_std = weights_std or self.args.get('weights_std')

        # Params - make sure to deal with params_hook! #
        if params_hook:
            # make sure the params_hook has W and b
            assert len(
            ) == 2, "Expected 2 params (W and b) for Conv1D, found {0!s}!".format(
            W, b = params_hook
            # if we are initializing weights from a gaussian
            if weights_init.lower() == 'gaussian':
                W = get_weights_gaussian(shape=filter_shape,
            # if we are initializing weights from a uniform distribution
            elif self.args.get('weights_init').lower() == 'uniform':
                W = get_weights_uniform(shape=filter_shape,
            # otherwise not implemented
                    "Did not recognize weights_init %s! Pleas try gaussian or uniform"
                    % str(self.args.get('weights_init')))
                raise NotImplementedError(
                    "Did not recognize weights_init %s! Pleas try gaussian or uniform"
                    % str(self.args.get('weights_init')))

            b = get_bias(shape=(num_filters, ),

        # Finally have the two parameters!
        self.params = [W, b]

        # Computational Graph! #
        if border_mode in ['valid', 'full']:
            conved = convolution(self.input,
                                 subsample=(stride, ),
        elif border_mode == 'same':
            conved = convolution(self.input,
                                 subsample=(stride, ),
            shift = (filter_length - 1) // 2
            conved = conved[:, :, shift:input_shape[2] + shift]

            log.error("Invalid border mode: '%s'" % border_mode)
            raise RuntimeError("Invalid border mode: '%s'" % border_mode)

        self.output = activation_func(conved + b.dimshuffle('x', 0, 'x'))
    def __init__(self,
                 strides=(1, 1),
        Initialize a 2-dimensional convolutional layer.

        inputs_hook : Tuple of (shape, variable)
            Routing information for the model to accept inputs from elsewhere. This is used for linking
            different models together. For now, it needs to include the shape information.
        params_hook : List(theano shared variable)
            A list of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables).
        outdir : str
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        input_size : tuple
            Shape of the incoming data: (batch_size, num_channels, input_height, input_width).
            If input_size is None, it can be inferred. However, border_mode can't be 'same'.
        filter_shape : tuple
            (num_filters, num_channels, filter_height, filter_width). This is also the shape of the weights matrix.
        stride : int
            The distance between the receptive field centers of neighboring units. This is the 'stride' of the
            convolution operation.
        border_mode : str, one of 'valid', 'full', 'same'
            A string indicating the convolution border mode.
            If 'valid', the convolution is only computed where the input and the
            filter fully overlap.
            If 'full', the convolution is computed wherever the input and the
            filter overlap by at least one position.
            If 'same', the convolution is computed wherever the input and the
            filter overlap by at least half the filter size, when the filter size
            is odd. In practice, the input is zero-padded with half the filter size
            at the beginning and half at the end (or one less than half in the case
            of an even filter size). This results in an output length that is the
            same as the input length (for both odd and even filter sizes).
        weights_init : str
            Determines the method for initializing model weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        activation : str or Callable
            The activation function to apply to the layer. See opendeep.utils.activation for options.
        convolution : str or Callable
            The 2-dimensional convolution implementation to use. The default of 'conv2d' is normally fine because it
            uses theano's tensor.nnet.conv.conv2d, which cherry-picks the best implementation with a meta-optimizer if
            you set the theano configuration flag 'optimizer_including=conv_meta'. Otherwise, you could pass a
            callable function, such as cudnn or cuda-convnet if you don't want to use the meta-optimizer.
        mrg : random
            A random number generator that is used when adding noise.
            I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.

        Theano's default convolution function (`theano.tensor.nnet.conv.conv2d`)
        does not support the 'same' border mode by default. This layer emulates
        it by performing a 'full' convolution and then cropping the result, which
        may negatively affect performance.
        super(Conv2D, self).__init__(
            {arg: val
             for (arg, val) in locals().items() if arg is not 'self'})

        # specifications #
        # grab info from the inputs_hook, or from parameters
        # expect input to be in the form (B, C, 0, 1) (batch, channel, rows, cols)
        # inputs_hook is a tuple of (Shape, Input)
        if self.inputs_hook:
            # make sure inputs_hook is a tuple
            assert len(
            ) == 2, "expecting inputs_hook to be tuple of (shape, input)"
            self.input = inputs_hook[1]
            # make the input a symbolic matrix
            self.input = T.ftensor4('X')

        # activation function!
        activation_func = get_activation_function(activation)

        # convolution function!
        if convolution == 'conv2d':
            # using the theano flag optimizer_including=conv_meta will let this conv function optimize itself.
            convolution_func = T.nnet.conv2d
            assert callable(
            ), "Input convolution was not 'conv2d' and was not Callable."
            convolution_func = convolution

        # filter shape should be in the form (num_filters, num_channels, filter_size[0], filter_size[1])
        num_filters = filter_shape[0]
        filter_size = filter_shape[2:3]

        # Params - make sure to deal with params_hook! #
        if self.params_hook:
            # make sure the params_hook has W and b
            assert len(self.params_hook) == 2, \
                "Expected 2 params (W and b) for Conv2D, found {0!s}!".format(len(self.params_hook))
            W, b = self.params_hook
            W = get_weights(
                # if gaussian
                # if uniform

            b = get_bias(shape=(num_filters, ),

        # Finally have the two parameters!
        self.params = [W, b]

        # Computational Graph! #
        if border_mode in ['valid', 'full']:
            conved = convolution_func(self.input,
        elif border_mode == 'same':
            assert self.input_size is not None, "input_size has to be specified for border_mode 'same'!"
            conved = convolution_func(self.input,
            shift_x = (filter_size[0] - 1) // 2
            shift_y = (filter_size[1] - 1) // 2
            conved = conved[:, :, shift_x:self.input_size[2] + shift_x,
                            shift_y:self.input_size[3] + shift_y]
            raise RuntimeError("Invalid border mode: '%s'" % border_mode)

        self.output = activation_func(conved + b.dimshuffle('x', 0, 'x', 'x'))
예제 #21
파일: basic.py 프로젝트: gujunli/OpenDeep
    def __init__(self,
        Initialize a basic layer.

        inputs : List of [tuple(shape, `Theano.TensorType`)]
            The dimensionality of the inputs for this model, and the routing information for the model
            to accept inputs from elsewhere. `shape` will be a monad tuple representing known
            sizes for each dimension in the `Theano.TensorType`. The length of `shape` should be equal to number of
            dimensions in `Theano.TensorType`, where the shape element is an integer representing the size for its
            dimension, or None if the shape isn't known. For example, if you have a matrix with unknown batch size
            but fixed feature size of 784, `shape` would be: (None, 784). The full form of `inputs` would be:
            [((None, 784), <TensorType(float32, matrix)>)].
        outputs : int
            The dimensionality of the output for this model.
        params : Dict(string_name: theano SharedVariable), optional
            A dictionary of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters - such as siamese networks or pretraining some
        outdir : str
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        activation : str or callable
            The activation function to use after the dot product going from input -> output. This can be a string
            representing an option from opendeep.utils.activation, or your own function as long as it is callable.
        weights_init : str
            Determines the method for initializing input -> output weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        mrg : random
            A random number generator that is used when adding noise.
            I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.
        # init Model to combine the defaults and config dictionaries with the initial parameters.
        initial_parameters = locals().copy()
        super(Dense, self).__init__(**initial_parameters)
        if self.inputs is None:

        # specifications #
        if len(self.inputs) > 1:
            raise NotImplementedError(
                "Expected 1 input to Dense, found %d. Please merge inputs before passing "
                "to the Dense model!" % len(self.inputs))
        # self.inputs is a list of all the input expressions (we enforce only 1, so self.inputs[0] is the input)
        input_shape, self.input = self.inputs[0]
        if isinstance(input_shape, int):
            self.input_size = ((None, ) *
                               (self.input.ndim - 1)) + (input_shape, )
            self.input_size = input_shape
        assert self.input_size is not None, "Need to specify the shape for the last dimension of the input!"

        # We also only have 1 output
        assert self.output_size is not None, "Need to specify outputs size!"
        out_size = self.output_size[0]
        if isinstance(out_size, int):
            self.output_size = self.input_size[:-1] + (out_size, )
            self.output_size = out_size

        # activation function!
        activation_func = get_activation_function(activation)

        # parameters - make sure to deal with input dictionary! #
        W = self.params.get(
                shape=(self.input_size[-1], self.output_size[-1]),
                # if gaussian
                # if uniform

        b = self.params.get(

        # Finally have the two parameters - weights matrix W and bias vector b. That is all!
        self.params = OrderedDict([("W", W), ("b", b)])

        # computation #
        # Here is the meat of the computation transforming input -> output
        # It simply involves a matrix multiplication of inputs*weights, adding the bias vector, and then passing
        # the result through our activation function (normally something nonlinear such as: max(0, output))
        self.output = activation_func(dot(self.input, W) + b)

            "Initialized a basic fully-connected layer with shape %s and activation: %s",
            str((self.input_size[-1], self.output_size[-1])), str(activation))
예제 #22
    def __init__(self,
        Initialize a 1-D convolutional layer.

        inputs_hook : Tuple of (shape, variable)
            Routing information for the model to accept inputs from elsewhere. This is used for linking
            different models together. For now, it needs to include the shape information.
        params_hook : List(theano shared variable)
            A list of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables).
        outdir : str
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        input_size : tuple
            Shape of the incoming data: (batch_size, num_channels, data_dimensionality). Most likely, your channels
            will be 1. For example, batches of text will be of the form (N, 1, D) where N=examples in minibatch and
            D=dimensionality (chars, words, etc.)
        filter_shape : tuple
            (num_filters, num_channels, filter_length). This is also the shape of the weights matrix.
        stride : int
            The distance between the receptive field centers of neighboring units. This is the 'stride' of the
            convolution operation.
        border_mode : str, one of 'valid', 'full', 'same'
            A string indicating the convolution border mode.
            If 'valid', the convolution is only computed where the input and the
            filter fully overlap.
            If 'full', the convolution is computed wherever the input and the
            filter overlap by at least one position.
            If 'same', the convolution is computed wherever the input and the
            filter overlap by at least half the filter size, when the filter size
            is odd. In practice, the input is zero-padded with half the filter size
            at the beginning and half at the end (or one less than half in the case
            of an even filter size). This results in an output length that is the
            same as the input length (for both odd and even filter sizes).
        weights_init : str
            Determines the method for initializing model weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        activation : str or Callable
            The activation function to apply to the layer. See opendeep.utils.activation for options.
        convolution : str or Callable
            The 1-dimensional convolution implementation to use. The default of 'mc0' is normally fine. See
            opendeep.utils.conv1d_implementations for alternatives. (This is necessary because Theano only
            supports 2D convolutions at the moment).
        mrg : random
            A random number generator that is used when adding noise.
            I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.

        Theano's default convolution function (`theano.tensor.nnet.conv.conv2d`)
        does not support the 'same' border mode by default. This layer emulates
        it by performing a 'full' convolution and then cropping the result, which
        may negatively affect performance.
        super(Conv1D, self).__init__(
            {arg: val
             for (arg, val) in locals().items() if arg is not 'self'})

        # specifications #
        # grab info from the inputs_hook, or from parameters
        # expect input to be in the form (B, C, I) (batch, channel, input data)
        # inputs_hook is a tuple of (Shape, Input)
        if self.inputs_hook is not None:
            # make sure inputs_hook is a tuple
            assert len(
            ) == 2, "expecting inputs_hook to be tuple of (shape, input)"
            self.input = inputs_hook[1]
            # make the input a symbolic matrix
            self.input = T.ftensor3('X')

        # activation function!
        activation_func = get_activation_function(activation)

        # convolution function!
        convolution_func = get_conv1d_function(convolution)

        # filter shape should be in the form (num_filters, num_channels, filter_length)
        num_filters = filter_shape[0]
        filter_length = filter_shape[2]

        # Params - make sure to deal with params_hook! #
        if self.params_hook:
            # make sure the params_hook has W and b
            assert len(self.params_hook) == 2, \
                "Expected 2 params (W and b) for Conv1D, found {0!s}!".format(len(self.params_hook))
            W, b = self.params_hook
            W = get_weights(
                # if gaussian
                # if uniform

            b = get_bias(shape=(num_filters, ),

        # Finally have the two parameters!
        self.params = [W, b]

        # Computational Graph! #
        if border_mode in ['valid', 'full']:
            conved = convolution_func(self.input,
                                      subsample=(stride, ),
        elif border_mode == 'same':
            conved = convolution_func(self.input,
                                      subsample=(stride, ),
            shift = (filter_length - 1) // 2
            conved = conved[:, :, shift:self.input_size[2] + shift]

            log.error("Invalid border mode: '%s'" % border_mode)
            raise RuntimeError("Invalid border mode: '%s'" % border_mode)

        self.output = activation_func(conved + b.dimshuffle('x', 0, 'x'))
예제 #23
    def __init__(self, inputs_hook, input_shape=None, filter_shape=None, convstride=None, padsize=None, group=None,
                 poolsize=None, poolstride=None, bias_init=None, local_response_normalization=None,
                 convolution=None, activation=None, params_hook=None, config=None, defaults=defaults):
        # combine everything by passing to Model's init
        super(ConvPoolLayer, self).__init__(**{arg: val for (arg, val) in locals().iteritems() if arg is not 'self'})
        # configs can now be accessed through self!

        # deal with the inputs coming from inputs_hook - necessary for now to give an input hook
        # inputs_hook is a tuple of (Shape, Input)
        if self.inputs_hook:
            assert len(self.inputs_hook) == 2, "expecting inputs_hook to be tuple of (shape, input)"
            self.input_shape = self.inputs_hook[0] or self.input_shape
            self.input = inputs_hook[1]
            self.input = T.ftensor4("X")

        # layer configuration #
        # activation function!
        # if a string name was given, look up the correct function from our utils.
        if isinstance(self.activation, basestring):
            self.activation_func = get_activation_function(self.activation)
        # otherwise, if a 'callable' was passed (i.e. custom function), use that directly.
            assert callable(self.activation), "Activation function either needs to be a string name or callable!"
            self.activation_func = self.activation

        # expect image_shape to be bc01!
        self.channel = self.input_shape[1]

        # shortening a word
        self.lrn = self.local_response_normalization

        # if lib_conv is cudnn, it works only on square images and the grad works only when channel % 16 == 0

        assert self.group in [1, 2], "group argument needs to be 1 or 2 (1 for default conv2d)"

        self.filter_shape = numpy.asarray(self.filter_shape)
        self.input_shape = numpy.asarray(self.input_shape)

        if self.lrn:
            self.lrn_func = cross_channel_normalization_bc01

        # Params - make sure to deal with params_hook! #
        if self.group == 1:
            if self.params_hook:
                # make sure the params_hook has W and b
                assert len(self.params_hook) == 2, \
                    "Expected 2 params (W and b) for ConvPoolLayer, found {0!s}!".format(len(self.params_hook))
                self.W, self.b = self.params_hook
                # if we are initializing weights from a gaussian
                if self.weights_init.lower() == 'gaussian':
                    self.W = get_weights_gaussian(
                        shape=self.filter_shape, mean=self.weights_mean, std=self.weights_std, name="W"
                # if we are initializing weights from a uniform distribution
                elif self.weights_init.lower() == 'uniform':
                    self.W = get_weights_uniform(shape=self.filter_shape, interval=self.weights_interval, name="W")
                # otherwise not implemented
                    log.error("Did not recognize weights_init %s! Pleas try gaussian or uniform" %
                    raise NotImplementedError(
                        "Did not recognize weights_init %s! Pleas try gaussian or uniform" %

                self.b = get_bias(shape=self.filter_shape[0], init_values=self.bias_init, name="b")
            self.params = [self.W, self.b]

            self.filter_shape[0] = self.filter_shape[0] / 2
            self.filter_shape[1] = self.filter_shape[1] / 2

            self.input_shape[0] = self.input_shape[0] / 2
            self.input_shape[1] = self.input_shape[1] / 2
            if self.params_hook:
                assert len(self.params_hook) == 4, "expected params_hook to have 4 params"
                self.W0, self.W1, self.b0, self.b1 = self.params_hook
                self.W0 = get_weights_gaussian(shape=self.filter_shape, name="W0")
                self.W1 = get_weights_gaussian(shape=self.filter_shape, name="W1")
                self.b0 = get_bias(shape=self.filter_shape[0], init_values=self.bias_init, name="b0")
                self.b1 = get_bias(shape=self.filter_shape[0], init_values=self.bias_init, name="b1")
            self.params = [self.W0, self.b0, self.W1, self.b1]

        # build appropriate graph for conv. version #
        self.output = self.build_computation_graph()

        # Local Response Normalization (for AlexNet)
        if self.lrn:
            self.output = self.lrn_func(self.output)

        log.debug("convpool layer initialized with shape_in: %s", str(self.input_shape))
예제 #24
    def __init__(self,
        Initialize a GSN.

        inputs_hook : Tuple of (shape, variable)
            Routing information for the model to accept inputs from elsewhere. This is used for linking
            different models together (e.g. setting the Softmax model's input layer to the DAE's hidden layer gives a
            newly supervised classification model). For now, it needs to include the shape information (normally the
            dimensionality of the input i.e. n_in).
        hiddens_hook : Tuple of (shape, variable)
            Routing information for the model to accept its hidden representation from elsewhere.
            This is used for linking different models together (e.g. setting the DAE model's hidden layers to the RNN's
            output layer gives a generative recurrent model.) For now, it needs to include the shape
            information (normally the dimensionality of the hiddens i.e. n_hidden).
        params_hook : List(theano shared variable)
            A list of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters - such as a training model with dropout applied
            to layers and one without for testing, where the parameters are shared between the two.
        outdir : str
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        input_size : int
            The size (dimensionality) of the input to the DAE. If shape is provided in `inputs_hook`, this is optional.
            The :class:`Model` requires an `output_size`, which gets set to this value because the DAE is an
            unsupervised model. The output is a reconstruction of the input.
        hidden_size : int
            The size (dimensionality) of the hidden layer for the DAE. Generally, you want it to be larger than
            `input_size`, which is known as *overcomplete*.
        visible_activation : str or callable
            The nonlinear (or linear) visible activation to perform after the dot product from hiddens -> visible layer.
            This activation function should be appropriate for the input unit types, i.e. 'sigmoid' for binary inputs.
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        hidden_activation : str or callable
            The nonlinear (or linear) hidden activation to perform after the dot product from visible -> hiddens layer.
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        layers : int
            The number of hidden layers to use.
        walkbacks : int
            The number of walkbacks to perform (the variable K in Bengio's paper above). A walkback is a Gibbs sample
            from the DAE, which means the model generates inputs in sequence, where each generated input is compared
            to the original input to create the reconstruction cost for training. For running the model, the very last
            generated input in the Gibbs chain is used as the output.
        input_sampling : bool
            During walkbacks, whether to sample from the generated input to create a new starting point for the next
            walkback (next step in the Gibbs chain). This generally makes walkbacks more effective by making the
            process more stochastic - more likely to find spurious modes in the model's representation.
        mrg : random
            A random number generator that is used when adding noise into the network and for sampling from the input.
            I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.
        tied_weights : bool
            DAE has two weight matrices - W from input -> hiddens and V from hiddens -> input. This boolean
            determines if V = W.T, which 'ties' V to W and reduces the number of parameters necessary during training.
        weights_init : str
            Determines the method for initializing model weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        cost_function : str or callable
            The function to use when calculating the reconstruction cost of the model. This should be appropriate
            for the type of input, i.e. use 'binary_crossentropy' for binary inputs, or 'mse' for real-valued inputs.
            See opendeep.utils.cost for options. You can also specify your own function, which needs to be callable.
        cost_args : dict
            Any additional named keyword arguments to pass to the specified `cost_function`.
        add_noise : bool
            Whether to add noise (corrupt) the input before passing it through the computation graph during training.
            This should most likely be set to the default of True, because this is a *denoising* autoencoder after all.
        noiseless_h1 : bool
            Whether to not add noise (corrupt) the hidden layer during computation.
        hidden_noise : str
            What type of noise to use for corrupting the hidden layer (if not `noiseless_h1`). See opendeep.utils.noise
            for options. This should be appropriate for the hidden unit activation, i.e. Gaussian for tanh or other
            real-valued activations, etc.
        hidden_noise_level : float
            The amount of noise to use for the noise function specified by `hidden_noise`. This could be the
            standard deviation for gaussian noise, the interval for uniform noise, the dropout amount, etc.
        input_noise : str
            What type of noise to use for corrupting the input before computation (if `add_noise`).
            See opendeep.utils.noise for options. This should be appropriate for the input units, i.e. salt-and-pepper
            for binary units, etc.
        input_noise_level : float
            The amount of noise used to corrupt the input. This could be the masking probability for salt-and-pepper,
            standard deviation for Gaussian, interval for Uniform, etc.
        noise_decay : str or False
            Whether to use `input_noise` scheduling (decay `input_noise_level` during the course of training),
            and if so, the string input specifies what type of decay to use. See opendeep.utils.decay for options.
            Noise decay (known as noise scheduling) effectively helps the DAE learn larger variance features first,
            and then smaller ones later (almost as a kind of curriculum learning). May help it converge faster.
        noise_annealing : float
            The amount to reduce the `input_noise_level` after each training epoch based on the decay function specified
            in `noise_decay`.
        image_width : int
            If the input should be represented as an image, the width of the input image. If not specified, it will be
            close to the square factor of the `input_size`.
        image_height : int
            If the input should be represented as an image, the height of the input image. If not specified, it will be
            close to the square factor of the `input_size`.
        # init Model to combine the defaults and config dictionaries with the initial parameters.
        initial_parameters = locals().copy()
        super(GSN, self).__init__(**initial_parameters)

        # when the input should be thought of as an image, either use the specified width and height,
        # or try to make as square as possible.
        if image_height is None and image_width is None:
            (_h, _w) = closest_to_square_factors(self.input_size)
            self.image_width = _w
            self.image_height = _h
            self.image_height = image_height
            self.image_width = image_width

        # Theano variables and RNG #
        if self.inputs_hook is None:
            self.X = T.matrix('X')
            # inputs_hook is a (shape, input) tuple
            self.X = self.inputs_hook[1]

        # Network specifications #
        # generally, walkbacks should be at least 2*layers
        if layers % 2 == 0:
            if walkbacks < 2 * layers:
                    'Not enough walkbacks for the layers! Layers is %s and walkbacks is %s. '
                    'Generaly want 2X walkbacks to layers', str(layers),
            if walkbacks < 2 * layers - 1:
                    'Not enough walkbacks for the layers! Layers is %s and walkbacks is %s. '
                    'Generaly want 2X walkbacks to layers', str(layers),

        self.add_noise = add_noise
        self.noise_annealing = as_floatX(
            noise_annealing)  # noise schedule parameter
        self.hidden_noise_level = sharedX(hidden_noise_level,
        self.hidden_noise = get_noise(name=hidden_noise,
        self.input_noise_level = sharedX(input_noise_level,
        self.input_noise = get_noise(name=input_noise,

        self.walkbacks = walkbacks
        self.tied_weights = tied_weights
        self.layers = layers
        self.noiseless_h1 = noiseless_h1
        self.input_sampling = input_sampling
        self.noise_decay = noise_decay

        # if there was a hiddens_hook, unpack the hidden layers in the tensor
        if self.hiddens_hook is not None:
            hidden_size = self.hiddens_hook[0]
            self.hiddens_flag = True
            self.hiddens_flag = False

        # determine the sizes of each layer in a list.
        #  layer sizes, from h0 to hK (h0 is the visible layer)
        hidden_size = list(raise_to_list(hidden_size))
        if len(hidden_size) == 1:
            self.layer_sizes = [self.input_size] + hidden_size * self.layers
            assert len(hidden_size) == self.layers, "Hiddens sizes and number of hidden layers mismatch." + \
                                                    "Hiddens %d and layers %d" % (len(hidden_size), self.layers)
            self.layer_sizes = [self.input_size] + hidden_size

        if self.hiddens_hook is not None:
            self.hiddens = self.unpack_hiddens(self.hiddens_hook[1])

        # Activation functions! #
        # hidden unit activation
        self.hidden_activation = get_activation_function(hidden_activation)
        # Visible layer activation
        self.visible_activation = get_activation_function(visible_activation)
        # make sure the sampling functions are appropriate for the activation functions.
        if is_binary(self.visible_activation):
            self.visible_sampling = mrg.binomial
            # TODO: implement non-binary activation
            log.error("Non-binary visible activation not supported yet!")
            raise NotImplementedError(
                "Non-binary visible activation not supported yet!")

        # Cost function
        self.cost_function = get_cost_function(cost_function)
        self.cost_args = cost_args or dict()

        # Parameters! #
        # make sure to deal with params_hook!
        if self.params_hook is not None:
            # if tied weights, expect layers*2 + 1 params
            if self.tied_weights:
                assert len(self.params_hook) == 2*layers + 1, \
                    "Tied weights: expected {0!s} params, found {1!s}!".format(2*layers+1, len(self.params_hook))
                self.weights_list = self.params_hook[:layers]
                self.bias_list = self.params_hook[layers:]
            # if untied weights, expect layers*3 + 1 params
                assert len(self.params_hook) == 3*layers + 1, \
                    "Untied weights: expected {0!s} params, found {1!s}!".format(3*layers+1, len(self.params_hook))
                self.weights_list = self.params_hook[:2 * layers]
                self.bias_list = self.params_hook[2 * layers:]
        # otherwise, construct our params
            # initialize a list of weights and biases based on layer_sizes for the GSN
            self.weights_list = [
                    shape=(self.layer_sizes[i], self.layer_sizes[i + 1]),
                    name="W_{0!s}_{1!s}".format(i, i + 1),
                    # if gaussian
                    # if uniform
                    interval=weights_interval) for i in range(layers)
            # add more weights if we aren't tying weights between layers (need to add for higher-lower layers now)
            if not tied_weights:
                        shape=(self.layer_sizes[i + 1], self.layer_sizes[i]),
                        name="W_{0!s}_{1!s}".format(i + 1, i),
                        # if gaussian
                        # if uniform
                    for i in reversed(range(layers))
            # initialize each layer bias to 0's.
            self.bias_list = [
                get_bias(shape=(self.layer_sizes[i], ),
                         name='b_' + str(i),
                         init_values=bias_init) for i in range(layers + 1)

        # build the params of the model into a list
        self.params = self.weights_list + self.bias_list
        log.debug("gsn params: %s", str(self.params))

        # using the properties, build the computational graph
        self.cost, self.monitors, self.output, self.hiddens = self.build_computation_graph(
예제 #25
    def __init__(self, inputs_hook, params_hook=None, input_shape=None, filter_shape=None, stride=None,
                 weights_init=None, weights_interval=None, weights_mean=None, weights_std=None, bias_init=None,
                 border_mode=None, activation=None, convolution=None, config=None, defaults=defaults):
        # combine everything by passing to Model's init
        super(Conv1D, self).__init__(**{arg: val for (arg, val) in locals().iteritems() if arg is not 'self'})
        # configs can now be accessed through self dictionary

        # specifications #
        # grab info from the inputs_hook, or from parameters
        # expect input to be in the form (B, C, I) (batch, channel, input data)
        #  inputs_hook is a tuple of (Shape, Input)
        if self.inputs_hook is not None:
            # make sure inputs_hook is a tuple
            assert len(self.inputs_hook) == 2, "expecting inputs_hook to be tuple of (shape, input)"
            self.input_shape = inputs_hook[0] or self.input_shape
            self.input = inputs_hook[1]
            # make the input a symbolic matrix
            self.input = T.ftensor3('X')

        # activation function!
        # if a string name was given, look up the correct function from our utils.
        if isinstance(self.activation, basestring):
            activation_func = get_activation_function(self.activation)
        # otherwise, if a 'callable' was passed (i.e. custom function), use that directly.
            assert callable(self.activation), "Activation function either needs to be a string name or callable!"
            activation_func = self.activation

        # filter shape should be in the form (num_filters, num_channels, filter_length)
        num_filters = self.filter_shape[0]
        filter_length = self.filter_shape[2]

        # Params - make sure to deal with params_hook! #
        if self.params_hook:
            # make sure the params_hook has W and b
            assert len(self.params_hook) == 2, \
                "Expected 2 params (W and b) for Conv1D, found {0!s}!".format(len(self.params_hook))
            W, b = self.params_hook
            # if we are initializing weights from a gaussian
            if self.weights_init.lower() == 'gaussian':
                W = get_weights_gaussian(
                    shape=self.filter_shape, mean=self.weights_mean, std=self.weights_std, name="W"
            # if we are initializing weights from a uniform distribution
            elif self.weights_init.lower() == 'uniform':
                W = get_weights_uniform(shape=self.filter_shape, interval=self.weights_interval, name="W")
            # otherwise not implemented
                log.error("Did not recognize weights_init %s! Pleas try gaussian or uniform" %
                raise NotImplementedError(
                    "Did not recognize weights_init %s! Pleas try gaussian or uniform" %

            b = get_bias(shape=(num_filters,), name="b", init_values=self.bias_init)

        # Finally have the two parameters!
        self.params = [W, b]

        # Computational Graph! #
        if self.border_mode in ['valid', 'full']:
            conved = convolution(self.input,
        elif self.border_mode == 'same':
            conved = convolution(self.input,
            shift = (filter_length - 1) // 2
            conved = conved[:, :, shift:self.input_shape[2] + shift]

            log.error("Invalid border mode: '%s'" % self.border_mode)
            raise RuntimeError("Invalid border mode: '%s'" % self.border_mode)

        self.output = activation_func(conved + b.dimshuffle('x', 0, 'x'))
예제 #26
파일: gru.py 프로젝트: adammenges/OpenDeep
    def __init__(self, inputs=None, hiddens=None, params=None, outdir='outputs/gru/',
                 activation='relu', gate_activation='sigmoid',
                 weights_init='uniform', weights_interval='montreal', weights_mean=0, weights_std=5e-3,
                 r_weights_init='identity', r_weights_interval='montreal', r_weights_mean=0, r_weights_std=5e-3,
        Initialize a GRU layer.

        inputs : List of [tuple(shape, `Theano.TensorType`)]
            The dimensionality of the inputs for this model, and the routing information for the model
            to accept inputs from elsewhere. `inputs` variable are expected to be of the form (timesteps, batch, data).
            `shape` will be a monad tuple representing known
            sizes for each dimension in the `Theano.TensorType`. The length of `shape` should be equal to number of
            dimensions in `Theano.TensorType`, where the shape element is an integer representing the size for its
            dimension, or None if the shape isn't known. For example, if you have a matrix with unknown batch size
            but fixed feature size of 784, `shape` would be: (None, 784). The full form of `inputs` would be:
            [((None, 784), <TensorType(float32, matrix)>)].
        hiddens : int or Tuple of (shape, `Theano.TensorType`)
            Int for the number of hidden units to use, or a tuple of shape, expression to route the starting
            hidden values from elsewhere.
        params : Dict(string_name: theano SharedVariable), optional
            A dictionary of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters - such as siamese networks or pretraining some
        outdir : str
            The location to produce outputs from training or running the :class:`GRU`. If None, nothing will be saved.
        activation : str or callable
            The nonlinear (or linear) activation to perform for the hidden units.
            This activation function should be appropriate for the output unit types, i.e. 'sigmoid' for binary.
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        gate_activation : str or callable
            The activation to perform for the hidden gates (default sigmoid).
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        mrg : random
            A random number generator that is used when adding noise.
            I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.
        weights_init : str
            Determines the method for initializing input-hidden model weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        r_weights_init : str
            Determines the method for initializing recurrent hidden-hidden model weights.
            See opendeep.utils.nnet for options.
        r_weights_interval : str or float
            If Uniform `r_weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        r_weights_mean : float
            If Gaussian `r_weights_init`, the mean value to use.
        r_weights_std : float
            If Gaussian `r_weights_init`, the standard deviation to use.
        r_bias_init : float
            The initial value to use for the recurrent bias parameter. Most often, the default of 0.0 is preferred.
        direction : str
            The direction this recurrent model should go over its inputs. Can be 'forward', 'backward', or
            'bidirectional'. In the case of 'bidirectional', it will make two passes over the sequence,
            computing two sets of hiddens and adding them together.
        clip_recurrent_grads : False or float, optional
            Whether to clip the gradients for the parameters that unroll over timesteps (such as the weights
            connecting previous hidden states to the current hidden state, and not the weights from current
            input to hiddens). If it is a float, the gradients for the weights will be hard clipped to the range
        initial_parameters = locals().copy()
        super(GRU, self).__init__(**initial_parameters)

        # specifications #
        backward = direction.lower() == 'backward'
        bidirectional = direction.lower() == 'bidirectional'

        # activation functions #
        # recurrent hidden activation functions!
        self.hidden_activation_func = get_activation_function(activation)
        self.gate_activation_func = get_activation_function(gate_activation)

        # inputs #
        # inputs are expected to have the shape (n_timesteps, batch_size, data)
        if len(self.inputs) > 1:
            raise NotImplementedError("Expected 1 input, found %d. Please merge inputs before passing "
                                      "to the model!" % len(self.inputs))
        # self.inputs is a list of all the input expressions (we enforce only 1, so self.inputs[0] is the input)
        self.input_shape, self.input = self.inputs[0]
        if isinstance(self.input_shape, int):
            self.input_shape = ((None, ) * (self.input.ndim-1)) + (self.input_shape, )
        assert self.input_shape is not None, "Need to specify the shape for at least the last dimension of the input!"
        # input is 3D tensor of (timesteps, batch_size, data_dim)
        # if input is 2D tensor, assume it is of the form (timesteps, data_dim) i.e. batch_size is 1. Convert to 3D.
        # if input is > 3D tensor, assume it is of form (timesteps, batch_size, data...) and flatten to 3D.
        if self.input.ndim == 1:
            self.input = unbroadcast(self.input.dimshuffle(0, 'x', 'x'), [1, 2])

        elif self.input.ndim == 2:
            self.input = unbroadcast(self.input.dimshuffle(0, 'x', 1), 1)

        elif self.input.ndim > 3:
            self.input = self.input.flatten(3)
            self.input_shape = self.input_shape[:2] + (prod(self.input_shape[2:]))

        # hiddens #
        # have only 1 hiddens
        assert len(self.hiddens) == 1, "Expected 1 `hiddens` param, found %d" % len(self.hiddens)
        self.hiddens = self.hiddens[0]
        # if hiddens is an int (hidden size parameter, not routing info)
        h_init = None
        if isinstance(self.hiddens, int):
            self.hidden_size = self.hiddens
        elif isinstance(self.hiddens, tuple):
            hidden_shape, h_init = self.hiddens
            if isinstance(hidden_shape, int):
                self.hidden_size = hidden_shape
                self.hidden_size = hidden_shape[-1]
            raise AssertionError("Hiddens need to be an int or tuple of (shape, theano_expression), found %s" %

        # output shape is going to be 3D with (timesteps, batch_size, hidden_size)
        self.output_size = (None, None, self.hidden_size)

        # parameters - make sure to deal with params dict input! #
        # all input-to-hidden weights
        W_z, W_r, W_h = [
                "W_%s" % sub,
                            shape=(self.input_shape[-1], self.hidden_size),
                            name="W_%s" % sub,
                            # if gaussian
                            # if uniform
            for sub in ['z', 'r', 'h']
        # all hidden-to-hidden (one direction) weights
        U_z, U_r, U_h = [
                "U_%s" % sub,
                            shape=(self.hidden_size, self.hidden_size),
                            name="U_%s" % sub,
                            # if gaussian
                            # if uniform
            for sub in ['z', 'r', 'h']
        # if bidirectional, make hidden-to-hidden weights again to go the opposite direction
        U_z_b, U_r_b, U_h_b = None, None, None
        if bidirectional:
            U_z_b, U_r_b, U_h_b = [
                    "U_%s_b" % sub,
                                shape=(self.hidden_size, self.hidden_size),
                                name="U_%s_b" % sub,
                                # if gaussian
                                # if uniform
                for sub in ['z', 'r', 'h']
        # biases
        b_z, b_r, b_h = [
                "b_%s" % sub,
                         name="b_%s" % sub,
            for sub in ['z', 'r', 'h']
        # clip gradients if we are doing that
        r_params = [U_z, U_r, U_h, U_z_b, U_r_b, U_h_b]
        if clip_recurrent_grads:
            clip = abs(clip_recurrent_grads)
            U_z, U_r, U_h, U_z_b, U_r_b, U_h_b = [
                grad_clip(param, -clip, clip) if param is not None
                else None
                for param in r_params

        # put all the parameters into our dictionary
        self.params = {
            "W_z": W_z,
            "W_r": W_r,
            "W_h": W_h,

            "U_z": U_z,
            "U_r": U_r,
            "U_h": U_h,

            "b_z": b_z,
            "b_r": b_r,
            "b_h": b_h,
        if bidirectional:
                    "U_z_b": U_z_b,
                    "U_r_b": U_r_b,
                    "U_h_b": U_h_b,

        # make h_init the right sized tensor
        if h_init is None:
            h_init = zeros_like(dot(self.input[0], W_h))

        # computation #
        # move some computation outside of scan to speed it up!
        x_z = dot(self.input, W_z) + b_z
        x_r = dot(self.input, W_r) + b_r
        x_h = dot(self.input, W_h) + b_h

        # now do the recurrent stuff
        self.hiddens, self.updates = scan(
            sequences=[x_z, x_r, x_h],
            non_sequences=[U_z, U_r, U_h],

        # if bidirectional, do the same in reverse!
        if bidirectional:
            hiddens_b, updates_b = scan(
                sequences=[x_z, x_r, x_h],
                non_sequences=[U_z_b, U_r_b, U_h_b],
                go_backwards=not backward,
            # flip the hiddens to be the right direction
            hiddens_b = hiddens_b[::-1]
            # update stuff
            self.hiddens += hiddens_b

        log.info("Initialized a GRU!")
    def __init__(self, inputs=None, hiddens=None, params=None, outdir='outputs/rbm/',
                 visible_activation='sigmoid', hidden_activation='sigmoid',
                 weights_init='uniform', weights_mean=0, weights_std=5e-3, weights_interval='montreal',
        RBM constructor. Defines the parameters of the model along with
        basic operations for inferring hidden from visible (and vice-versa),
        as well as for performing CD updates.

        inputs : List of [tuple(shape, `Theano.TensorType`)]
            The dimensionality of the inputs for this model, and the routing information for the model
            to accept inputs from elsewhere. `inputs` variable are expected to be of the form (timesteps, batch, data).
            `shape` will be a monad tuple representing known
            sizes for each dimension in the `Theano.TensorType`. The length of `shape` should be equal to number of
            dimensions in `Theano.TensorType`, where the shape element is an integer representing the size for its
            dimension, or None if the shape isn't known. For example, if you have a matrix with unknown batch size
            but fixed feature size of 784, `shape` would be: (None, 784). The full form of `inputs` would be:
            [((None, 784), <TensorType(float32, matrix)>)].
        hiddens : int or Tuple of (shape, `Theano.TensorType`)
            Int for the number of hidden units to use, or a tuple of shape, expression to route the starting
            hidden values from elsewhere.
        params : Dict(string_name: theano SharedVariable), optional
            A dictionary of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters - such as siamese networks or pretraining some
        outdir : str
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        visible_activation : str or callable
            The nonlinear (or linear) visible activation to perform after the dot product from hiddens -> visible layer.
            This activation function should be appropriate for the input unit types, i.e. 'sigmoid' for binary inputs.
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        hidden_activation : str or callable
            The nonlinear (or linear) hidden activation to perform after the dot product from visible -> hiddens layer.
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        weights_init : str
            Determines the method for initializing model weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        mrg : random
            A random number generator that is used when sampling. The RBM is a probabilistic model, so it relies a lot
            on sampling. I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.
        k : int
            The k number of steps used for CD-k or PCD-k with Gibbs sampling. Basically, the number of samples
            generated from the model to train against reconstructing the original input.
        # init Model to combine the defaults and config dictionaries with the initial parameters.
        initial_parameters = locals().copy()
        super(RBM, self).__init__(**initial_parameters)

        # specifications #
        if len(self.inputs) > 1:
            raise NotImplementedError("Expected 1 input to RBM, found %d. Please merge inputs before passing "
                                      "to the model!" % len(self.inputs))
        # self.inputs is a list of all the input expressions (we enforce only 1, so self.inputs[0] is the input)
        input_shape, self.input = self.inputs[0]
        if isinstance(input_shape, int):
            self.input_size = ((None,) * (self.input.ndim - 1)) + (input_shape,)
            self.input_size = input_shape
        assert self.input_size is not None, "Need to specify the shape for the last dimension of the input!"

        # our output space is the same as the input space
        self.output_size = self.input_size

        # grab hiddens
        # have only 1 hiddens
        assert len(self.hiddens) == 1, "Expected 1 `hiddens` param, found %d" % len(self.hiddens)
        self.hiddens = self.hiddens[0]
        if isinstance(self.hiddens, int):
            hidden_size = self.hiddens
            hiddens_init = None
        elif isinstance(self.hiddens, tuple):
            hidden_shape, hiddens_init = self.hiddens
            if isinstance(hidden_shape, int):
                hidden_size = hidden_shape
                hidden_size = hidden_shape[-1]
            raise AssertionError("Hiddens need to be an int or tuple of (shape, theano_expression), found %s" %

        # other specifications
        # visible activation function!
        self.visible_activation_func = get_activation_function(visible_activation)

        # make sure the sampling functions are appropriate for the activation functions.
        if is_binary(self.visible_activation_func):
            self.visible_sampling = mrg.binomial
            # TODO: implement non-binary activation
            log.error("Non-binary visible activation not supported yet!")
            raise NotImplementedError("Non-binary visible activation not supported yet!")

        # hidden activation function!
        self.hidden_activation_func = get_activation_function(hidden_activation)

        # make sure the sampling functions are appropriate for the activation functions.
        if is_binary(self.hidden_activation_func):
            self.hidden_sampling = mrg.binomial
            # TODO: implement non-binary activation
            log.error("Non-binary hidden activation not supported yet!")
            raise NotImplementedError("Non-binary hidden activation not supported yet!")

        # parameters - make sure to deal with params_hook! #
        self.W = self.params.get(
                        shape=(self.input_size[-1], hidden_size),
                        # if gaussian
                        # if uniform
        self.b_v = self.params.get(
            get_bias(shape=self.input_size[-1], name="b_v", init_values=bias_init)
        self.b_h = self.params.get(
            get_bias(shape=hidden_size, name="b_h", init_values=bias_init)

        # Finally have the parameters
        self.params = {"W": self.W, "b_v": self.b_v, "b_h": self.b_h}

        # computation #
        # initialize from visibles if we aren't generating from some hiddens
        if hiddens_init is None:
            [_, v_chain, _, h_chain], self.updates = theano.scan(fn=self._gibbs_step_vhv,
                                                                 outputs_info=[None, self.input, None, None],
        # initialize from hiddens
            [_, v_chain, _, h_chain], self.updates = theano.scan(fn=self._gibbs_step_hvh,
                                                                 outputs_info=[None, None, None, hiddens_init],

        self.v_sample = v_chain[-1]
        self.h_sample = h_chain[-1]

        mean_v, _, _, _ = self._gibbs_step_vhv(self.v_sample)

        # the free-energy cost function!
        # consider v_sample constant when computing gradients on the cost function
        # this actually keeps v_sample from being considered in the gradient, to set gradient to 0 instead,
        # use theano.gradient.zero_grad
        v_sample_constant = theano.gradient.disconnected_grad(self.v_sample)
        # v_sample_constant = v_sample
        self.cost = (self.free_energy(self.input) - self.free_energy(v_sample_constant)) / self.input.shape[0]

        log.debug("Initialized an RBM shape %s",
                  str((self.input_size, hidden_size)))
    def build_computation_graph(self):
        Creates the output, hiddens, updates, cost, and parameters for the RNN!

        Output, top-level hiddens, updates, cost, and parameters for the RNN.
        # parameters - make sure to deal with params_hook! #
        if self.params_hook is not None:
            # expect at least W_{x_h}, W_{h_h}, W_{h_y}, b_h, b_y -> this is for single-direction RNN.
            assert len(self.params_hook) >= 3*self.layers+2, \
                "Expected at least {0!s} params for rnn, found {1!s}!".format(3*self.layers+2, len(self.params_hook))
            W_x_h = self.params_hook[:self.layers]
            W_h_h = self.params_hook[self.layers:2 * self.layers]
            b_h = self.params_hook[2 * self.layers:3 * self.layers]
            W_h_y = self.params_hook[3 * self.layers]
            b_y = self.params_hook[3 * self.layers + 1]
            # now the case for extra parameters dealing with a backward pass in addition to forward (bidirectional)
            if self.bidirectional:
                assert len(self.params_hook) >= 4*self.layers+2, \
                    "Expected at least {0!s} params for bidirectional (merging hiddens) rnn, found {1!s}!".format(
                        4*self.layers+2, len(self.params_hook))
                # if we are merging according to DeepSpeech paper, this is all we need in addition for bidirectional.
                W_h_hb = self.params_hook[3 * self.layers + 2:4 * self.layers +
        # otherwise, construct our params
            # input-to-hidden (and hidden-to-hidden higher layer) weights
            W_x_h = []
            for l in range(self.layers):
                if l > 0:
                            shape=(self.hidden_size, self.hidden_size),
                            name="W_%d_%d" % (l, l + 1),
                            # if gaussian
                            # if uniform
                            shape=(self.input_size, self.hidden_size),
                            name="W_%d_%d" % (l, l + 1),
                            # if gaussian
                            # if uniform
            # hidden-to-hidden same layer weights
            W_h_h = [
                    shape=(self.hidden_size, self.hidden_size),
                    name="W_%d_%d" % (l + 1, l + 1),
                    # if gaussian
                    # if uniform
                for l in range(self.layers)
            # hidden-to-output weights
            W_h_y = get_weights(
                shape=(self.hidden_size, self.output_size),
                # if gaussian
                # if uniform
            # hidden bias for each layer
            b_h = [
                get_bias(shape=(self.hidden_size, ),
                         name="b_h_%d" % (l + 1),
                for l in range(self.layers)
            # output bias
            b_y = get_bias(shape=(self.output_size, ),
            # extra parameters necessary for second backward pass on hiddens if this is bidirectional
            if self.bidirectional:
                # hidden-to-hidden same layer backward weights.
                W_h_hb = [
                        shape=(self.hidden_size, self.hidden_size),
                        name="W_%d_%db" % (l + 1, l + 1),
                        # if gaussian
                        # if uniform
                    for l in range(self.layers)

        # put all the parameters into our list, and make sure it is in the same order as when we try to load
        # them from a params_hook!!!
        params = W_x_h + W_h_h + b_h + [W_h_y] + [b_y]
        if self.bidirectional:
            params += W_h_hb

        # make h_init the right sized tensor
        if not self.hiddens_hook:
            self.h_init = T.zeros_like(T.dot(self.input[0], W_x_h[0]))

        # computation #
        hiddens = self.input
        updates = dict()
        # vanilla case! there will be only 1 hidden layer for each depth layer.
        for layer in range(self.layers):
            log.debug("Updating hidden layer %d" % (layer + 1))
            # normal case - either forward or just backward!
            hiddens_new, updates = theano.scan(
                non_sequences=[W_x_h[layer], W_h_h[layer], b_h[layer]],
                name="rnn_scan_normal_%d" % layer,

            # bidirectional case - need to add a backward sequential pass to compute new hiddens!
            if self.bidirectional:
                # now do the opposite direction for the scan!
                hiddens_opposite, updates_opposite = theano.scan(
                    non_sequences=[W_x_h[layer], W_h_hb[layer], b_h[layer]],
                    go_backwards=(not self.backward),
                    name="rnn_scan_backward_%d" % layer,
                hiddens_new = hiddens_new + hiddens_opposite

            # replace the hiddens with the newly computed hiddens (and add noise)!
            hiddens = hiddens_new
            # add noise (like dropout) if we wanted it!
            if self.noise:
                self.hiddens = T.switch(self.noise_switch,

        # now compute the outputs from the leftover (top level) hiddens
        output = self.activation_func(T.dot(hiddens, W_h_y) + b_y)

        # now to define the cost of the model - use the cost function to compare our output with the target value.
        cost = self.cost_function(output=output,

        log.info("Initialized a %s RNN!" % self.direction)
        return output, hiddens, updates, cost, params
예제 #29
    def __init__(self,
        Initialize a simple recurrent network.

        inputs_hook : Tuple of (shape, variable)
            Routing information for the model to accept inputs from elsewhere. This is used for linking
            different models together (e.g. setting the Softmax model's input layer to the DAE's hidden layer gives a
            newly supervised classification model). For now, it needs to include the shape information (normally the
            dimensionality of the input i.e. n_in).
        hiddens_hook : Tuple of (shape, variable)
            Routing information for the model to accept its hidden representation from elsewhere. For recurrent nets,
            this will be the initial starting value for hidden layers.
        params_hook : List(theano shared variable)
            A list of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters.
        outdir : str
            The location to produce outputs from training or running the :class:`RNN`. If None, nothing will be saved.
        input_size : int
            The size (dimensionality) of the input. If shape is provided in `inputs_hook`, this is optional.
        hidden_size : int
            The size (dimensionality) of the hidden layers. If shape is provided in `hiddens_hook`, this is optional.
        output_size : int
            The size (dimensionality) of the output.
        activation : str or callable
            The nonlinear (or linear) activation to perform after the dot product from hiddens -> output layer.
            This activation function should be appropriate for the output unit types, i.e. 'sigmoid' for binary.
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        hidden_activation : str or callable
            The activation to perform for the hidden units.
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        inner_hidden_activation : str or callable
            The activation to perform for the hidden gates.
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        mrg : random
            A random number generator that is used when adding noise.
            I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.
        weights_init : str
            Determines the method for initializing model weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        r_weights_init : str
            Determines the method for initializing recurrent model weights. See opendeep.utils.nnet for options.
        r_weights_interval : str or float
            If Uniform `r_weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        r_weights_mean : float
            If Gaussian `r_weights_init`, the mean value to use.
        r_weights_std : float
            If Gaussian `r_weights_init`, the standard deviation to use.
        r_bias_init : float
            The initial value to use for the recurrent bias parameter. Most often, the default of 0.0 is preferred.
        cost_function : str or callable
            The function to use when calculating the output cost of the model.
            See opendeep.utils.cost for options. You can also specify your own function, which needs to be callable.
        cost_args : dict
            Any additional named keyword arguments to pass to the specified `cost_function`.
        noise : str
            What type of noise to use for the hidden layers and outputs. See opendeep.utils.noise
            for options. This should be appropriate for the unit activation, i.e. Gaussian for tanh or other
            real-valued activations, etc.
        noise_level : float
            The amount of noise to use for the noise function specified by `hidden_noise`. This could be the
            standard deviation for gaussian noise, the interval for uniform noise, the dropout amount, etc.
        noise_decay : str or False
            Whether to use `noise` scheduling (decay `noise_level` during the course of training),
            and if so, the string input specifies what type of decay to use. See opendeep.utils.decay for options.
            Noise decay (known as noise scheduling) effectively helps the model learn larger variance features first,
            and then smaller ones later (almost as a kind of curriculum learning). May help it converge faster.
        noise_decay_amount : float
            The amount to reduce the `noise_level` after each training epoch based on the decay function specified
            in `noise_decay`.
        forward : bool
            The direction this recurrent model should go over its inputs. True means forward, False mean backward.
        clip_recurrent_grads : False or float, optional
            Whether to clip the gradients for the parameters that unroll over timesteps (such as the weights
            connecting previous hidden states to the current hidden state, and not the weights from current
            input to hiddens). If it is a float, the gradients for the weights will be hard clipped to the range
        initial_parameters = locals().copy()
        super(GRU, self).__init__(**initial_parameters)

        # specifications #

        # activation, cost, and noise functions #
        # recurrent hidden activation function!
        self.hidden_activation_func = get_activation_function(
        self.inner_hidden_activation_func = get_activation_function(

        # output activation function!
        activation_func = get_activation_function(activation)

        # Cost function
        cost_function = get_cost_function(cost_function)
        cost_args = cost_args or dict()

        # Now deal with noise if we added it:
        if noise:
            log.debug('Adding %s noise switch.' % str(noise))
            if noise_level is not None:
                noise_level = sharedX(value=noise_level)
                noise_func = get_noise(noise, noise_level=noise_level, mrg=mrg)
                noise_func = get_noise(noise, mrg=mrg)
            # apply the noise as a switch!
            # default to apply noise. this is for the cost and gradient functions to be computed later
            # (not sure if the above statement is accurate such that gradient depends on initial value of switch)
            self.noise_switch = sharedX(value=1, name="gru_noise_switch")

            # noise scheduling
            if noise_decay and noise_level is not None:
                self.noise_schedule = get_decay_function(
                    noise_decay, noise_level, noise_level.get_value(),

        # inputs hook #
        # grab info from the inputs_hook
        # in the case of an inputs_hook, recurrent will always work with the leading tensor dimension
        # being the temporal dimension.
        # input is 3D tensor of (timesteps, batch_size, data_dim)
        # if input is 2D tensor, assume it is of the form (timesteps, data_dim) i.e. batch_size is 1. Convert to 3D.
        # if input is > 3D tensor, assume it is of form (timesteps, batch_size, data...) and flatten to 3D.
        if self.inputs_hook is not None:
            self.input = self.inputs_hook[1]

            if self.input.ndim == 1:
                self.input = T.unbroadcast(self.input.dimshuffle(0, 'x', 'x'),
                                           [1, 2])
                self.input_size = 1

            elif self.input.ndim == 2:
                self.input = T.unbroadcast(self.input.dimshuffle(0, 'x', 1), 1)

            elif self.input.ndim > 3:
                self.input = self.input.flatten(3)
                self.input_size = sum(self.input_size)
                raise NotImplementedError(
                    "Recurrent input with %d dimensions not supported!" %
            xs = self.input
            # Assume input coming from optimizer is (batches, timesteps, data)
            # so, we need to reshape to (timesteps, batches, data)
            self.input = T.tensor3("Xs")
            xs = self.input.dimshuffle(1, 0, 2)

        # The target outputs for supervised training - in the form of (batches, timesteps, output) which is
        # the same dimension ordering as the expected input from optimizer.
        # therefore, we need to swap it like we did to input xs.
        self.target = T.tensor3("Ys")
        ys = self.target.dimshuffle(1, 0, 2)

        # hiddens hook #
        # set an initial value for the recurrent hiddens from hook
        if self.hiddens_hook is not None:
            h_init = self.hiddens_hook[1]
            self.hidden_size = self.hiddens_hook[0]
            # deal with h_init after parameters are made (have to make the same size as hiddens that are computed)
            self.hidden_size = hidden_size

        # for generating #
        # symbolic scalar for how many recurrent steps to use during generation from the model
        self.n_steps = T.iscalar("generate_n_steps")

        # parameters - make sure to deal with params_hook! #
        if self.params_hook is not None:
            (W_x_z, W_x_r, W_x_h, U_h_z, U_h_r, U_h_h, W_h_y, b_z, b_r, b_h,
             b_y) = self.params_hook
            recurrent_params = [U_h_z, U_h_r, U_h_h]
        # otherwise, construct our params
            # all input-to-hidden weights
            W_x_z, W_x_r, W_x_h = [
                    shape=(self.input_size, self.hidden_size),
                    name="W_x_%s" % sub,
                    # if gaussian
                    # if uniform
                    interval=weights_interval) for sub in ['z', 'r', 'h']
            # all hidden-to-hidden weights
            U_h_z, U_h_r, U_h_h = [
                    shape=(self.hidden_size, self.hidden_size),
                    name="U_h_%s" % sub,
                    # if gaussian
                    # if uniform
                    interval=r_weights_interval) for sub in ['z', 'r', 'h']
            # hidden-to-output weights
            W_h_y = get_weights(
                shape=(self.hidden_size, self.output_size),
                # if gaussian
                # if uniform
            # biases
            b_z, b_r, b_h = [
                get_bias(shape=(self.hidden_size, ),
                         name="b_%s" % sub,
                         init_values=r_bias_init) for sub in ['z', 'r', 'h']
            # output bias
            b_y = get_bias(shape=(self.output_size, ),
            # clip gradients if we are doing that
            recurrent_params = [U_h_z, U_h_r, U_h_h]
            if clip_recurrent_grads:
                clip = abs(clip_recurrent_grads)
                U_h_z, U_h_r, U_h_h = [
                    theano.gradient.grad_clip(p, -clip, clip)
                    for p in recurrent_params

        # put all the parameters into our list, and make sure it is in the same order as when we try to load
        # them from a params_hook!!!
        self.params = [W_x_z, W_x_r, W_x_h
                       ] + recurrent_params + [W_h_y, b_z, b_r, b_h, b_y]

        # make h_init the right sized tensor
        if not self.hiddens_hook:
            h_init = T.zeros_like(T.dot(xs[0], W_x_h))

        # computation #
        # move some computation outside of scan to speed it up!
        x_z = T.dot(xs, W_x_z) + b_z
        x_r = T.dot(xs, W_x_r) + b_r
        x_h = T.dot(xs, W_x_h) + b_h

        # now do the recurrent stuff
        self.hiddens, self.updates = theano.scan(
            sequences=[x_z, x_r, x_h],
            non_sequences=[U_h_z, U_h_r, U_h_h],
            go_backwards=not forward,

        # add noise (like dropout) if we wanted it!
        if noise:
            self.hiddens = T.switch(self.noise_switch,

        # now compute the outputs from the leftover (top level) hiddens
        self.output = activation_func(T.dot(self.hiddens, W_h_y) + b_y)

        # now to define the cost of the model - use the cost function to compare our output with the target value.
        self.cost = cost_function(output=self.output, target=ys, **cost_args)

        log.info("Initialized a GRU!")
예제 #30
    def __init__(self, inputs=None, params=None, outdir='outputs/conv2d',
                 n_filters=None, filter_size=None, stride=(1, 1), border_mode='valid',
                 weights_init='uniform', weights_interval='montreal', weights_mean=0, weights_std=5e-3,
        Initialize a 2-dimensional convolutional layer.

        inputs : tuple(shape, `Theano.TensorType`)
            The dimensionality of the inputs for this model, and the routing information for the model
            to accept inputs from elsewhere. `shape` will be a monad tuple representing known
            sizes for each dimension in the `Theano.TensorType`. Shape of the incoming data:
            (batch_size, num_channels, input_height, input_width).
            If input_size is None, it can be inferred. However, border_mode can't be 'same'.
        params : Dict(string_name: theano SharedVariable), optional
            A dictionary of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters - such as siamese networks or pretraining some
        outdir : str
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        n_filters : int
            The number of filters to use (convolution kernels).
        filter_size : tuple(int) or int
            (filter_height, filter_width). If it is an int, size will be duplicated across height and width.
        stride : tuple(int)
            The distance between the receptive field centers of neighboring units. This is the 'stride' of the
            convolution operation.
        border_mode : str, one of 'valid', 'full'
            A string indicating the convolution border mode.
            If 'valid', the convolution is only computed where the input and the
            filter fully overlap.
            If 'full', the convolution is computed wherever the input and the
            filter overlap by at least one position.
        weights_init : str
            Determines the method for initializing model weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        activation : str or Callable
            The activation function to apply to the layer. See opendeep.utils.activation for options.
        convolution : str or Callable
            The 2-dimensional convolution implementation to use. The default of 'conv2d' is normally fine because it
            uses theano's tensor.nnet.conv.conv2d, which cherry-picks the best implementation with a meta-optimizer if
            you set the theano configuration flag 'optimizer_including=conv_meta'. Otherwise, you could pass a
            callable function, such as cudnn or cuda-convnet if you don't want to use the meta-optimizer.
        mrg : random
            A random number generator that is used when adding noise.
            I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.

        Theano's default convolution function (`theano.tensor.nnet.conv.conv2d`)
        does not support the 'same' border mode by default. This layer emulates
        it by performing a 'full' convolution and then cropping the result, which
        may negatively affect performance.
        super(Conv2D, self).__init__(**{arg: val for (arg, val) in locals().items() if arg is not 'self'})

        # specifications #
        # expect input to be in the form (B, C, 0, 1) (batch, channel, rows, cols)
        # self.inputs is a list of all the input expressions (we enforce only 1, so self.inputs[0] is the input)
        input_shape, self.input = self.inputs[0]
        assert self.input.ndim == 4, "Expected 4D input variable with form (batch, channel, rows, cols)"
        assert len(input_shape) == 4, "Expected 4D input shape with form (batch, channel, rows, cols)"

        n_channels = input_shape[1]

        if isinstance(filter_size, int):
            filter_size = (filter_size, )*2

        # activation function!
        activation_func = get_activation_function(activation)

        # convolution function!
        if convolution == 'conv2d':
            # using the theano flag optimizer_including=conv_meta will let this conv function optimize itself.
            convolution_func = conv2d
            assert callable(convolution), "Input convolution was not 'conv2d' and was not Callable."
            convolution_func = convolution

        # filter shape should be in the form (num_filters, num_channels, filter_size[0], filter_size[1])

        outshape = ConvOp.getOutputShape(
        self.output_size = (input_shape[0], n_filters) + outshape

        filter_shape = (n_filters, n_channels) + filter_size

        # Params #
        W = self.params.get(
                        # if gaussian
                        # if uniform

        b = self.params.get(
            get_bias(shape=(n_filters, ), name="b", init_values=bias_init)

        # Finally have the two parameters!
        self.params = OrderedDict([("W", W), ("b", b)])

        # Computational Graph! #
        if border_mode in ['valid', 'full']:
            conved = convolution_func(self.input,
            raise RuntimeError("Invalid border mode: '%s'" % border_mode)

        self.output = activation_func(conved + b.dimshuffle('x', 0, 'x', 'x'))
예제 #31
    def __init__(self,
                 input_size=28 * 28,
        # initialize the Model superclass
        super(DenoisingAutoencoder, self).__init__(**{
            arg: val
            for (arg, val) in locals().iteritems() if arg is not 'self'

        # Define model hyperparameters
        # deal with the inputs_hook and hiddens_hook for the size parameters!
        # if the hook exists, grab the size from the first element of the tuple.
        if self.inputs_hook is not None:
            assert len(self.inputs_hook
                       ) == 2, "Was expecting inputs_hook to be a tuple."
            self.input_size = inputs_hook[0]

        if self.hiddens_hook is not None:
            assert len(self.hiddens_hook
                       ) == 2, "was expecting hiddens_hook to be a tuple."
            hidden_size = hiddens_hook[0]

        # use the helper methods to grab appropriate activation functions from names!
        hidden_activation = get_activation_function(hidden_activation)
        visible_activation = get_activation_function(visible_activation)

        # do the same for the cost function
        cost_function = get_cost_function(cost_function)

        # Now, define the symbolic input to the model (Theano)
        # We use a matrix rather than a vector so that minibatch processing can be done in parallel.
        # Make sure to deal with 'inputs_hook' if it exists!
        if self.inputs_hook is not None:
            # grab the new input variable from the inputs_hook tuple
            x = self.inputs_hook[1]
            x = T.matrix("X")
        self.inputs = [x]

        # Build the model's parameters - a weight matrix and two bias vectors
        # Make sure to deal with 'params_hook' if it exists!
        if self.params_hook:
            # check to see if it contains the three necessary variables
            assert len(self.params_hook
                       ) == 3, "Not correct number of params to DAE, needs 3!"
            W, b0, b1 = self.params_hook
            W = get_weights_uniform(shape=(self.input_size, hidden_size),
            b0 = get_bias(shape=self.input_size, name="b0")
            b1 = get_bias(shape=hidden_size, name="b1")
        self.params = [W, b0, b1]

        # Perform the computation for a denoising autoencoder!
        # first, add noise (corrupt) the input
        corrupted_input = salt_and_pepper(input=x, noise_level=noise_level)
        # next, run the hidden layer given the inputs (the encoding function)
        # We don't need to worry about hiddens_hook during training, because we can't
        # run a cost without having the input!
        # hiddens_hook is more for the run function and linking methods below.
        hiddens = hidden_activation(T.dot(corrupted_input, W) + b1)
        # finally, create the reconstruction from the hidden layer (we tie the weights with W.T)
        reconstruction = visible_activation(T.dot(hiddens, W.T) + b0)
        # the training cost is reconstruction error
        self.train_cost = cost_function(output=reconstruction, target=x)

        # Compile everything into a Theano function for prediction!
        # When using real-world data in predictions, we wouldn't corrupt the input first.
        # Therefore, create another version of the hiddens and reconstruction without adding the noise.
        # Here is where we would handle hiddens_hook because this is a generative model!
        # For the run function, it would take in the hiddens instead of the input variable x.
        if self.hiddens_hook is not None:
            self.hiddens = self.hiddens_hook[1]
            self.hiddens = hidden_activation(T.dot(x, W) + b1)
        # make the reconstruction (generated) from the hiddens
        self.recon_predict = visible_activation(T.dot(self.hiddens, W.T) + b0)
        # now compile the run function accordingly - if it used x or hiddens as the input.
        if self.hiddens_hook is not None:
            self.f_run = function(inputs=[self.hiddens],
            self.f_run = function(inputs=[x], outputs=self.recon_predict)
예제 #32
    def __init__(self, inputs_hook=None, hiddens_hook=None, params_hook=None, outdir=None,
                 input_size=None, hidden_size=None,
                 layers=2, walkbacks=4,
                 visible_activation='sigmoid', hidden_activation='tanh',
                 input_sampling=True, mrg=RNG_MRG.MRG_RandomStreams(1),
                 weights_init='uniform', weights_interval='montreal', weights_mean=0, weights_std=5e-3,
                 cost_function='binary_crossentropy', cost_args=None,
                 add_noise=True, noiseless_h1=True,
                 hidden_noise='gaussian', hidden_noise_level=2, input_noise='salt_and_pepper', input_noise_level=0.4,
                 noise_decay='exponential', noise_annealing=1,
                 image_width=None, image_height=None,
                 rnn_hidden_size=None, rnn_hidden_activation='rectifier',
                 rnn_weights_mean=0, rnn_weights_std=5e-3, rnn_weights_interval='montreal',
        Initialize an RNN-GSN.

        inputs_hook : Tuple of (shape, variable)
            Routing information for the model to accept inputs from elsewhere. This is used for linking
            different models together (e.g. setting the Softmax model's input layer to the DAE's hidden layer gives a
            newly supervised classification model). For now, it needs to include the shape information (normally the
            dimensionality of the input i.e. n_in).
        hiddens_hook : Tuple of (shape, variable)
            Routing information for the model to accept its hidden representation from elsewhere.
            This is used for linking different models together (e.g. setting the DAE model's hidden layers to the RNN's
            output layer gives a generative recurrent model.) For now, it needs to include the shape
            information (normally the dimensionality of the hiddens i.e. n_hidden).
        params_hook : List(theano shared variable)
            A list of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters - such as a training model with dropout applied
            to layers and one without for testing, where the parameters are shared between the two.
        outdir : str
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        input_size : int
            The size (dimensionality) of the input to the DAE. If shape is provided in `inputs_hook`, this is optional.
            The :class:`Model` requires an `output_size`, which gets set to this value because the DAE is an
            unsupervised model. The output is a reconstruction of the input.
        hidden_size : int
            The size (dimensionality) of the hidden layer for the DAE. Generally, you want it to be larger than
            `input_size`, which is known as *overcomplete*.
        layers : int
            The number of hidden layers to use.
        walkbacks : int
            The number of walkbacks to perform (the variable K in Bengio's paper above). A walkback is a Gibbs sample
            from the DAE, which means the model generates inputs in sequence, where each generated input is compared
            to the original input to create the reconstruction cost for training. For running the model, the very last
            generated input in the Gibbs chain is used as the output.
        visible_activation : str or callable
            The nonlinear (or linear) visible activation to perform after the dot product from hiddens -> visible layer.
            This activation function should be appropriate for the input unit types, i.e. 'sigmoid' for binary inputs.
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        hidden_activation : str or callable
            The nonlinear (or linear) hidden activation to perform after the dot product from visible -> hiddens layer.
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        input_sampling : bool
            During walkbacks, whether to sample from the generated input to create a new starting point for the next
            walkback (next step in the Gibbs chain). This generally makes walkbacks more effective by making the
            process more stochastic - more likely to find spurious modes in the model's representation.
        mrg : random
            A random number generator that is used when adding noise into the network and for sampling from the input.
            I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.
        tied_weights : bool
            DAE has two weight matrices - W from input -> hiddens and V from hiddens -> input. This boolean
            determines if V = W.T, which 'ties' V to W and reduces the number of parameters necessary during training.
        weights_init : str
            Determines the method for initializing model weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        cost_function : str or callable
            The function to use when calculating the reconstruction cost of the model. This should be appropriate
            for the type of input, i.e. use 'binary_crossentropy' for binary inputs, or 'mse' for real-valued inputs.
            See opendeep.utils.cost for options. You can also specify your own function, which needs to be callable.
        cost_args : dict
            Any additional named keyword arguments to pass to the specified `cost_function`.
        add_noise : bool
            Whether to add noise (corrupt) the input before passing it through the computation graph during training.
            This should most likely be set to the default of True, because this is a *denoising* autoencoder after all.
        noiseless_h1 : bool
            Whether to not add noise (corrupt) the hidden layer during computation.
        hidden_noise : str
            What type of noise to use for corrupting the hidden layer (if not `noiseless_h1`). See opendeep.utils.noise
            for options. This should be appropriate for the hidden unit activation, i.e. Gaussian for tanh or other
            real-valued activations, etc.
        hidden_noise_level : float
            The amount of noise to use for the noise function specified by `hidden_noise`. This could be the
            standard deviation for gaussian noise, the interval for uniform noise, the dropout amount, etc.
        input_noise : str
            What type of noise to use for corrupting the input before computation (if `add_noise`).
            See opendeep.utils.noise for options. This should be appropriate for the input units, i.e. salt-and-pepper
            for binary units, etc.
        input_noise_level : float
            The amount of noise used to corrupt the input. This could be the masking probability for salt-and-pepper,
            standard deviation for Gaussian, interval for Uniform, etc.
        noise_decay : str or False
            Whether to use `input_noise` scheduling (decay `input_noise_level` during the course of training),
            and if so, the string input specifies what type of decay to use. See opendeep.utils.decay for options.
            Noise decay (known as noise scheduling) effectively helps the DAE learn larger variance features first,
            and then smaller ones later (almost as a kind of curriculum learning). May help it converge faster.
        noise_annealing : float
            The amount to reduce the `input_noise_level` after each training epoch based on the decay function specified
            in `noise_decay`.
        image_width : int
            If the input should be represented as an image, the width of the input image. If not specified, it will be
            close to the square factor of the `input_size`.
        image_height : int
            If the input should be represented as an image, the height of the input image. If not specified, it will be
            close to the square factor of the `input_size`.
        rnn_hidden_size : int
            The number of hidden units (dimensionality) to use in the recurrent layer.
        rnn_hidden_activation : str or Callable
            The activation function to apply to recurrent units. See opendeep.utils.activation for options.
        rnn_weights_init : str
            Determines the method for initializing recurrent weights. See opendeep.utils.nnet for options. 'Identity'
            works well with 'rectifier' `rnn_hidden_activation`.
        rnn_weights_mean : float
            If Gaussian `rnn_weights_init`, the mean value to use.
        rnn_weights_std : float
            If Gaussian `rnn_weights_init`, the standard deviation to use.
        rnn_weights_interval : str or float
            If Uniform `rnn_weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        rnn_bias_init : float
            The initial value to use for the recurrent bias parameter. Most often, the default of 0.0 is preferred.
        generate_n_steps : int
            When generating from the model, how many steps to generate.
        initial_parameters = locals().copy()
        super(RNN_GSN, self).__init__(**initial_parameters)

        # specifications #
        self.input_size = input_size
        self.layers = layers
        self.walkbacks = walkbacks
        self.input_sampling = input_sampling
        self.mrg = mrg
        self.tied_weights = tied_weights
        self.noise_decay = noise_decay
        self.noise_annealing = noise_annealing
        self.add_noise = add_noise
        self.noiseless_h1 = noiseless_h1
        self.hidden_noise = hidden_noise
        self.hidden_noise_level = hidden_noise_level
        self.input_noise = input_noise
        self.input_noise_level = input_noise_level
        self.image_width = image_width
        self.image_height = image_height

        # grab info from the inputs_hook, hiddens_hook, or from parameters
        if self.inputs_hook is not None:  # inputs_hook is a tuple of (Shape, Input)
            raise NotImplementedError("Inputs_hook not implemented yet for RNN-GSN")
            # make the input a symbolic matrix - a sequence of inputs
            self.input = T.matrix('Xs')

        # set an initial value for the recurrent hiddens
        self.u0 = T.zeros((rnn_hidden_size,))

        # make a symbolic vector for the initial recurrent hiddens value to use during generation for the model
        self.generate_u0 = T.vector("generate_u0")

        # either grab the hidden's desired size from the parameter directly, or copy n_in
        self.hidden_size = hidden_size or self.input_size

        # deal with hiddens_hook
        if self.hiddens_hook is not None:
            raise NotImplementedError("Hiddens_hook not implemented yet for RNN-GSN")

        # other specifications
        # visible activation function!
        self.visible_activation_func = get_activation_function(visible_activation)

        # make sure the sampling functions are appropriate for the activation functions.
        if is_binary(self.visible_activation_func):
            self.visible_sampling = mrg.binomial
            # TODO: implement non-binary activation
            log.error("Non-binary visible activation not supported yet!")
            raise NotImplementedError("Non-binary visible activation not supported yet!")

        # hidden activation function!
        self.hidden_activation_func = get_activation_function(hidden_activation)

        # recurrent hidden activation function!
        self.rnn_hidden_activation_func = get_activation_function(rnn_hidden_activation)

        # Cost function
        self.cost_function = get_cost_function(cost_function)
        self.cost_args = cost_args

        # symbolic scalar for how many recurrent steps to use during generation from the model
        self.n_steps = T.iscalar("generate_n_steps")

        # determine the sizes of each layer in a list.
        # layer sizes, from h0 to hK (h0 is the visible layer)
        self.layer_sizes = [self.input_size] + [self.hidden_size] * self.layers

        # parameters - make sure to deal with params_hook! #
        if self.params_hook is not None:
            # if tied weights, expect (layers*2 + 1) params for GSN and (int(layers+1)/int(2) + 3) for RNN
            if self.tied_weights:
                expected_num = (2*self.layers + 1) + (int(self.layers+1)/2 + 3)
                assert len(self.params_hook) == expected_num, \
                    "Tied weights: expected {0!s} params, found {1!s}!".format(expected_num, len(self.params_hook))
                gsn_len = (2*self.layers + 1)
                self.weights_list = self.params_hook[:self.layers]
                self.bias_list = self.params_hook[self.layers:gsn_len]

            # if untied weights, expect layers*3 + 1 params
                expected_num = (3*self.layers + 1) + (int(self.layers + 1)/2 + 3)
                assert len(self.params_hook) == expected_num, \
                    "Untied weights: expected {0!s} params, found {1!s}!".format(expected_num, len(self.params_hook))
                gsn_len = (3*self.layers + 1)
                self.weights_list = self.params_hook[:2*self.layers]
                self.bias_list = self.params_hook[2*self.layers:gsn_len]

            rnn_len = gsn_len + int(self.layers + 1) / 2
            self.recurrent_to_gsn_weights_list = self.params_hook[gsn_len:rnn_len]
            self.W_u_u = self.params_hook[rnn_len:rnn_len + 1]
            self.W_x_u = self.params_hook[rnn_len + 1:rnn_len + 2]
            self.recurrent_bias = self.params_hook[rnn_len + 2:rnn_len + 3]

        # otherwise, construct our params
            # initialize a list of weights and biases based on layer_sizes for the GSN
            self.weights_list = [get_weights(weights_init=weights_init,
                                             shape=(self.layer_sizes[i], self.layer_sizes[i + 1]),
                                             name="W_{0!s}_{1!s}".format(i, i + 1),
                                             # if gaussian
                                             # if uniform
                                 for i in range(self.layers)]
            # add more weights if we aren't tying weights between layers (need to add for higher-lower layers now)
            if not self.tied_weights:
                                 shape=(self.layer_sizes[i + 1], self.layer_sizes[i]),
                                 name="W_{0!s}_{1!s}".format(i + 1, i),
                                 # if gaussian
                                 # if uniform
                     for i in reversed(range(self.layers))]
            # initialize each layer bias to 0's.
            self.bias_list = [get_bias(shape=(self.layer_sizes[i],),
                                       name='b_' + str(i),
                              for i in range(self.layers + 1)]

            self.recurrent_to_gsn_weights_list = [
                            shape=(rnn_hidden_size, self.layer_sizes[layer]),
                            # if gaussian
                            # if uniform
                for layer in range(self.layers + 1) if layer % 2 != 0
            self.W_u_u = get_weights(weights_init=rnn_weights_init,
                                     shape=(rnn_hidden_size, rnn_hidden_size),
                                     # if gaussian
                                     #if uniform
            self.W_x_u = get_weights(weights_init=rnn_weights_init,
                                     shape=(self.input_size, rnn_hidden_size),
                                     # if gaussian
                                     # if uniform
            self.recurrent_bias = get_bias(shape=(rnn_hidden_size,),

        # build the params of the model into a list
        self.gsn_params = self.weights_list + self.bias_list
        self.params = self.gsn_params + \
                      self.recurrent_to_gsn_weights_list + \
                      [self.W_u_u, self.W_x_u, self.recurrent_bias]
        log.debug("rnn-gsn params: %s", str(self.params))

        # Create the RNN-GSN graph!
        self.x_sample, self.cost, self.monitors, self.updates_train, self.x_ts, self.updates_generate, self.u_t = \

        log.info("Initialized an RNN-GSN!")
예제 #33
    def __init__(self, inputs_hook=None, params_hook=None, outdir='outputs/conv2d',
                 input_size=None, filter_shape=None, strides=(1, 1), border_mode='valid',
                 weights_init='uniform', weights_interval='montreal', weights_mean=0, weights_std=5e-3,
        Initialize a 2-dimensional convolutional layer.

        inputs_hook : Tuple of (shape, variable)
            Routing information for the model to accept inputs from elsewhere. This is used for linking
            different models together. For now, it needs to include the shape information.
        params_hook : List(theano shared variable)
            A list of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables).
        outdir : str
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        input_size : tuple
            Shape of the incoming data: (batch_size, num_channels, input_height, input_width).
            If input_size is None, it can be inferred. However, border_mode can't be 'same'.
        filter_shape : tuple
            (num_filters, num_channels, filter_height, filter_width). This is also the shape of the weights matrix.
        stride : int
            The distance between the receptive field centers of neighboring units. This is the 'stride' of the
            convolution operation.
        border_mode : str, one of 'valid', 'full', 'same'
            A string indicating the convolution border mode.
            If 'valid', the convolution is only computed where the input and the
            filter fully overlap.
            If 'full', the convolution is computed wherever the input and the
            filter overlap by at least one position.
            If 'same', the convolution is computed wherever the input and the
            filter overlap by at least half the filter size, when the filter size
            is odd. In practice, the input is zero-padded with half the filter size
            at the beginning and half at the end (or one less than half in the case
            of an even filter size). This results in an output length that is the
            same as the input length (for both odd and even filter sizes).
        weights_init : str
            Determines the method for initializing model weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        activation : str or Callable
            The activation function to apply to the layer. See opendeep.utils.activation for options.
        convolution : str or Callable
            The 2-dimensional convolution implementation to use. The default of 'conv2d' is normally fine because it
            uses theano's tensor.nnet.conv.conv2d, which cherry-picks the best implementation with a meta-optimizer if
            you set the theano configuration flag 'optimizer_including=conv_meta'. Otherwise, you could pass a
            callable function, such as cudnn or cuda-convnet if you don't want to use the meta-optimizer.
        mrg : random
            A random number generator that is used when adding noise.
            I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.

        Theano's default convolution function (`theano.tensor.nnet.conv.conv2d`)
        does not support the 'same' border mode by default. This layer emulates
        it by performing a 'full' convolution and then cropping the result, which
        may negatively affect performance.
        super(Conv2D, self).__init__(**{arg: val for (arg, val) in locals().items() if arg is not 'self'})

        # specifications #
        # grab info from the inputs_hook, or from parameters
        # expect input to be in the form (B, C, 0, 1) (batch, channel, rows, cols)
        # inputs_hook is a tuple of (Shape, Input)
        if self.inputs_hook:
            # make sure inputs_hook is a tuple
            assert len(self.inputs_hook) == 2, "expecting inputs_hook to be tuple of (shape, input)"
            self.input = inputs_hook[1]
            # make the input a symbolic matrix
            self.input = T.ftensor4('X')

        # activation function!
        activation_func = get_activation_function(activation)

        # convolution function!
        if convolution == 'conv2d':
            # using the theano flag optimizer_including=conv_meta will let this conv function optimize itself.
            convolution_func = T.nnet.conv2d
            assert callable(convolution), "Input convolution was not 'conv2d' and was not Callable."
            convolution_func = convolution

        # filter shape should be in the form (num_filters, num_channels, filter_size[0], filter_size[1])
        num_filters = filter_shape[0]
        filter_size = filter_shape[2:3]

        # Params - make sure to deal with params_hook! #
        if self.params_hook:
            # make sure the params_hook has W and b
            assert len(self.params_hook) == 2, \
                "Expected 2 params (W and b) for Conv2D, found {0!s}!".format(len(self.params_hook))
            W, b = self.params_hook
            W = get_weights(weights_init=weights_init,
                            # if gaussian
                            # if uniform

            b = get_bias(shape=(num_filters, ), name="b", init_values=bias_init)

        # Finally have the two parameters!
        self.params = [W, b]

        # Computational Graph! #
        if border_mode in ['valid', 'full']:
            conved = convolution_func(self.input,
        elif border_mode == 'same':
            assert self.input_size is not None, "input_size has to be specified for border_mode 'same'!"
            conved = convolution_func(self.input,
            shift_x = (filter_size[0] - 1) // 2
            shift_y = (filter_size[1] - 1) // 2
            conved = conved[:, :, shift_x:self.input_size[2] + shift_x,
                            shift_y:self.input_size[3] + shift_y]
            raise RuntimeError("Invalid border mode: '%s'" % border_mode)

        self.output = activation_func(conved + b.dimshuffle('x', 0, 'x', 'x'))
예제 #34
    def __init__(self, inputs_hook=None, hiddens_hook=None, params_hook=None,
                 input_size=28*28, hidden_size=1000, noise_level=0.4,
                 hidden_activation='tanh', visible_activation='sigmoid', cost_function='binary_crossentropy'):
        # initialize the Model superclass
        super(DenoisingAutoencoder, self).__init__(
            **{arg: val for (arg, val) in locals().iteritems() if arg is not 'self'}

        # Define model hyperparameters
        # deal with the inputs_hook and hiddens_hook for the size parameters!
        # if the hook exists, grab the size from the first element of the tuple.
        if self.inputs_hook is not None:
            assert len(self.inputs_hook) == 2, "Was expecting inputs_hook to be a tuple."
            self.input_size = inputs_hook[0]

        if self.hiddens_hook is not None:
            assert len(self.hiddens_hook) == 2, "was expecting hiddens_hook to be a tuple."
            hidden_size = hiddens_hook[0]

        # use the helper methods to grab appropriate activation functions from names!
        hidden_activation  = get_activation_function(hidden_activation)
        visible_activation = get_activation_function(visible_activation)

        # do the same for the cost function
        cost_function = get_cost_function(cost_function)

        # Now, define the symbolic input to the model (Theano)
        # We use a matrix rather than a vector so that minibatch processing can be done in parallel.
        # Make sure to deal with 'inputs_hook' if it exists!
        if self.inputs_hook is not None:
            # grab the new input variable from the inputs_hook tuple
            x = self.inputs_hook[1]
            x = T.matrix("X")
        self.inputs = [x]

        # Build the model's parameters - a weight matrix and two bias vectors
        # Make sure to deal with 'params_hook' if it exists!
        if self.params_hook:
            # check to see if it contains the three necessary variables
            assert len(self.params_hook) == 3, "Not correct number of params to DAE, needs 3!"
            W, b0, b1 = self.params_hook
            W  = get_weights_uniform(shape=(self.input_size, hidden_size), name="W")
            b0 = get_bias(shape=self.input_size, name="b0")
            b1 = get_bias(shape=hidden_size, name="b1")
        self.params = [W, b0, b1]

        # Perform the computation for a denoising autoencoder!
        # first, add noise (corrupt) the input
        corrupted_input = salt_and_pepper(input=x, noise_level=noise_level)
        # next, run the hidden layer given the inputs (the encoding function)
        # We don't need to worry about hiddens_hook during training, because we can't
        # run a cost without having the input!
        # hiddens_hook is more for the run function and linking methods below.
        hiddens = hidden_activation(T.dot(corrupted_input, W) + b1)
        # finally, create the reconstruction from the hidden layer (we tie the weights with W.T)
        reconstruction = visible_activation(T.dot(hiddens, W.T) + b0)
        # the training cost is reconstruction error
        self.train_cost = cost_function(output=reconstruction, target=x)

        # Compile everything into a Theano function for prediction!
        # When using real-world data in predictions, we wouldn't corrupt the input first.
        # Therefore, create another version of the hiddens and reconstruction without adding the noise.
        # Here is where we would handle hiddens_hook because this is a generative model!
        # For the run function, it would take in the hiddens instead of the input variable x.
        if self.hiddens_hook is not None:
            self.hiddens = self.hiddens_hook[1]
            self.hiddens = hidden_activation(T.dot(x, W) + b1)
        # make the reconstruction (generated) from the hiddens
        self.recon_predict = visible_activation(T.dot(self.hiddens, W.T) + b0)
        # now compile the run function accordingly - if it used x or hiddens as the input.
        if self.hiddens_hook is not None:
            self.f_run = function(inputs=[self.hiddens], outputs=self.recon_predict)
            self.f_run = function(inputs=[x], outputs=self.recon_predict)
예제 #35
    def __init__(self, inputs_hook=None, params_hook=None, outdir='outputs/convpool',
                 input_size=None, filter_shape=None, convstride=4, padsize=0, group=1,
                 poolsize=3, poolstride=2,
                 weights_init='gaussian', weights_interval='montreal', weights_mean=0, weights_std=.01,
        Initialize a convpool layer.

        inputs_hook : Tuple of (shape, variable)
            Routing information for the model to accept inputs from elsewhere. This is used for linking
            different models together. For now, it needs to include the shape information.
        params_hook : List(theano shared variable)
            A list of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables).
        outdir : str
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        input_size : tuple
            Shape of the incoming data: (batch_size, num_channels, input_height, input_width).
        filter_shape : tuple
            (num_filters, num_channels, filter_height, filter_width). This is also the shape of the weights matrix.
        convstride : int
            The distance between the receptive field centers of neighboring units. This is the 'subsample' of theano's
            convolution operation.
        padsize : int
            This is the border_mode for theano's convolution operation.
        group : int
            Not yet supported, used for multi-gpu implementation.
            .. todo:: support multi-gpu
        poolsize : int
            How much to downsample the output.
        poolstride : int
            The stride width for downsampling the output.
        weights_init : str
            Determines the method for initializing model weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        activation : str or Callable
            The activation function to apply to the layer. See opendeep.utils.activation for options.
        convolution : str or Callable
            The 2-dimensional convolution implementation to use. The default of 'conv2d' is normally fine because it
            uses theano's tensor.nnet.conv.conv2d, which cherry-picks the best implementation with a meta-optimizer if
            you set the theano configuration flag 'optimizer_including=conv_meta'. Otherwise, you could pass a
            callable function, such as cudnn or cuda-convnet if you don't want to use the meta-optimizer.
        mrg : random
            A random number generator that is used when adding noise.
            I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.
        super(ConvPoolLayer, self).__init__(**{arg: val for (arg, val) in locals().items() if arg is not 'self'})

        # deal with the inputs coming from inputs_hook - necessary for now to give an input hook
        # inputs_hook is a tuple of (Shape, Input)
        if self.inputs_hook:
            assert len(self.inputs_hook) == 2, "expecting inputs_hook to be tuple of (shape, input)"
            self.input = inputs_hook[1]
            self.input = T.ftensor4("X")

        self.group = group

        # layer configuration #
        # activation function!
        self.activation_func = get_activation_function(activation)

        # convolution function!
        if convolution == 'conv2d':
            # using the theano flag optimizer_including=conv_meta will let this conv function optimize itself.
            self.convolution_func = T.nnet.conv2d
            assert callable(convolution), "Input convolution was not 'conv2d' and was not Callable."
            self.convolution_func = convolution

        # expect image_shape to be bc01!
        self.channel = self.input_size[1]

        self.convstride = convstride
        self.padsize = padsize

        self.poolstride = poolstride
        self.poolsize = poolsize

        # if lib_conv is cudnn, it works only on square images and the grad works only when channel % 16 == 0

        assert self.group in [1, 2], "group argument needs to be 1 or 2 (1 for default conv2d)"

        filter_shape = numpy.asarray(filter_shape)
        self.input_size = numpy.asarray(self.input_size)

        if local_response_normalization:
            lrn_func = cross_channel_normalization_bc01
            lrn_func = None

        # Params - make sure to deal with params_hook! #
        if self.group == 1:
            if self.params_hook:
                # make sure the params_hook has W and b
                assert len(self.params_hook) == 2, \
                    "Expected 2 params (W and b) for ConvPoolLayer, found {0!s}!".format(len(self.params_hook))
                self.W, self.b = self.params_hook
                self.W = get_weights(weights_init=weights_init,
                                     # if gaussian
                                     # if uniform

                self.b = get_bias(shape=filter_shape[0], init_values=bias_init, name="b")

            self.params = [self.W, self.b]

            filter_shape[0] = filter_shape[0] / 2
            filter_shape[1] = filter_shape[1] / 2

            self.input_size[0] = self.input_size[0] / 2
            self.input_size[1] = self.input_size[1] / 2
            if self.params_hook:
                assert len(self.params_hook) == 4, "expected params_hook to have 4 params"
                self.W0, self.W1, self.b0, self.b1 = self.params_hook
                self.W0 = get_weights_gaussian(shape=filter_shape, name="W0")
                self.W1 = get_weights_gaussian(shape=filter_shape, name="W1")
                self.b0 = get_bias(shape=filter_shape[0], init_values=bias_init, name="b0")
                self.b1 = get_bias(shape=filter_shape[0], init_values=bias_init, name="b1")
            self.params = [self.W0, self.b0, self.W1, self.b1]

        # build appropriate graph for conv. version #
        self.output = self._build_computation_graph()

        # Local Response Normalization (for AlexNet)
        if local_response_normalization and lrn_func is not None:
            self.output = lrn_func(self.output)

        log.debug("convpool layer initialized with shape_in: %s", str(self.input_size))
예제 #36
    def __init__(self,
        Initialize an LSTM.

        inputs : List of [tuple(shape, `Theano.TensorType`)]
            The dimensionality of the inputs for this model, and the routing information for the model
            to accept inputs from elsewhere. `inputs` variable are expected to be of the form (timesteps, batch, data).
            `shape` will be a monad tuple representing known
            sizes for each dimension in the `Theano.TensorType`. The length of `shape` should be equal to number of
            dimensions in `Theano.TensorType`, where the shape element is an integer representing the size for its
            dimension, or None if the shape isn't known. For example, if you have a matrix with unknown batch size
            but fixed feature size of 784, `shape` would be: (None, 784). The full form of `inputs` would be:
            [((None, 784), <TensorType(float32, matrix)>)].
        hiddens : int or Tuple of (shape, `Theano.TensorType`)
            Int for the number of hidden units to use, or a tuple of shape, expression to route the starting
            hidden values from elsewhere.
        params : Dict(string_name: theano SharedVariable), optional
            A dictionary of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters - such as siamese networks or pretraining some
        outdir : str
            The location to produce outputs from training or running the :class:`LSTM`. If None, nothing will be saved.
        activation : str or callable
            The nonlinear (or linear) activation to perform for the hidden units.
            This activation function should be appropriate for the output unit types, i.e. 'sigmoid' for binary.
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        gate_activation : str or callable
            The activation to perform for the hidden gates (default sigmoid).
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        mrg : random
            A random number generator that is used when adding noise.
            I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.
        weights_init : str
            Determines the method for initializing input-hidden model weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        r_weights_init : str
            Determines the method for initializing recurrent hidden-hidden model weights.
            See opendeep.utils.nnet for options.
        r_weights_interval : str or float
            If Uniform `r_weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        r_weights_mean : float
            If Gaussian `r_weights_init`, the mean value to use.
        r_weights_std : float
            If Gaussian `r_weights_init`, the standard deviation to use.
        r_bias_init : float
            The initial value to use for the recurrent bias parameter. Most often, the default of 0.0 is preferred.
        direction : str
            The direction this recurrent model should go over its inputs. Can be 'forward', 'backward', or
            'bidirectional'. In the case of 'bidirectional', it will make two passes over the sequence,
            computing two sets of hiddens and adding them together.
        clip_recurrent_grads : False or float, optional
            Whether to clip the gradients for the parameters that unroll over timesteps (such as the weights
            connecting previous hidden states to the current hidden state, and not the weights from current
            input to hiddens). If it is a float, the gradients for the weights will be hard clipped to the range
        initial_parameters = locals().copy()
        super(LSTM, self).__init__(**initial_parameters)

        # specifications #
        backward = direction.lower() == 'backward'
        bidirectional = direction.lower() == 'bidirectional'

        # activation functions #
        # recurrent hidden activation functions!
        self.hidden_activation_func = get_activation_function(activation)
        self.gate_activation_func = get_activation_function(gate_activation)

        # inputs #
        # inputs are expected to have the shape (n_timesteps, batch_size, data)
        if len(self.inputs) > 1:
            raise NotImplementedError(
                "Expected 1 input, found %d. Please merge inputs before passing "
                "to the model!" % len(self.inputs))
        # self.inputs is a list of all the input expressions (we enforce only 1, so self.inputs[0] is the input)
        input_shape, self.input = self.inputs[0]
        if isinstance(input_shape, int):
            self.input_size = ((None, ) *
                               (self.input.ndim - 1)) + (input_shape, )
            self.input_size = input_shape
        assert self.input_size is not None, "Need to specify the shape for at least the last dimension of the input!"
        # input is 3D tensor of (timesteps, batch_size, data_dim)
        # if input is 2D tensor, assume it is of the form (timesteps, data_dim) i.e. batch_size is 1. Convert to 3D.
        # if input is > 3D tensor, assume it is of form (timesteps, batch_size, data...) and flatten to 3D.
        if self.input.ndim == 1:
            self.input = unbroadcast(self.input.dimshuffle(0, 'x', 'x'),
                                     [1, 2])

        elif self.input.ndim == 2:
            self.input = unbroadcast(self.input.dimshuffle(0, 'x', 1), 1)

        elif self.input.ndim > 3:
            self.input = self.input.flatten(3)
            self.input_size = self.input_size[:2] + (prod(self.input_size[2:]))

        # hiddens #
        # have only 1 hiddens
        assert len(
            self.hiddens) == 1, "Expected 1 `hiddens` param, found %d" % len(
        self.hiddens = self.hiddens[0]
        # if hiddens is an int (hidden size parameter, not routing info)
        h_init = None
        if isinstance(self.hiddens, int):
            self.hidden_size = self.hiddens
        elif isinstance(self.hiddens, tuple):
            hidden_shape, h_init = self.hiddens
            if isinstance(hidden_shape, int):
                self.hidden_size = hidden_shape
                self.hidden_size = hidden_shape[-1]
            raise AssertionError(
                "Hiddens need to be an int or tuple of (shape, theano_expression), found %s"
                % type(self.hiddens))

        # output shape is going to be 3D with (timesteps, batch_size, hidden_size)
        self.output_size = (None, None, self.hidden_size)

        # parameters - make sure to deal with params dict input! #
        # all input-to-hidden weights
        W_c, W_i, W_f, W_o = [
                "W_%s" % sub,
                    shape=(self.input_size[-1], self.hidden_size),
                    name="W_%s" % sub,
                    # if gaussian
                    # if uniform
            for sub in ['c', 'i', 'f', 'o']
        # all hidden-to-hidden weights
        U_c, U_i, U_f, U_o = [
                "U_%s" % sub,
                    shape=(self.hidden_size, self.hidden_size),
                    name="U_%s" % sub,
                    # if gaussian
                    # if uniform
            for sub in ['c', 'i', 'f', 'o']
        # if bidirectional, make hidden-to-hidden weights again to go the opposite direction
        U_c_b, U_i_b, U_f_b, U_o_b = None, None, None, None
        if bidirectional:
            U_c_b, U_i_b, U_f_b, U_o_b = [
                    "U_%s_b" % sub,
                        shape=(self.hidden_size, self.hidden_size),
                        name="U_%s_b" % sub,
                        # if gaussian
                        # if uniform
                for sub in ['c', 'i', 'f', 'o']
        # biases
        b_c, b_i, b_f, b_o = [
                "b_%s" % sub,
                get_bias(shape=(self.hidden_size, ),
                         name="b_%s" % sub,
            for sub in ['c', 'i', 'f', 'o']
        # clip gradients if we are doing that
        recurrent_params = [U_c, U_i, U_f, U_o, U_c_b, U_i_b, U_f_b, U_o_b]
        if clip_recurrent_grads:
            clip = abs(clip_recurrent_grads)
            U_c, U_i, U_f, U_o, U_c_b, U_i_b, U_f_b, U_o_b = [
                grad_clip(param, -clip, clip) if param is not None else None
                for param in recurrent_params

        # put all the parameters into our dictionary
        self.params = {
            "W_c": W_c,
            "W_i": W_i,
            "W_f": W_f,
            "W_o": W_o,
            "U_c": U_c,
            "U_i": U_i,
            "U_f": U_f,
            "U_o": U_o,
            "b_c": b_c,
            "b_i": b_i,
            "b_f": b_f,
            "b_o": b_o,
        if bidirectional:
                "U_c_b": U_c_b,
                "U_i_b": U_i_b,
                "U_f_b": U_f_b,
                "U_o_b": U_o_b,

        # make h_init the right sized tensor
        if h_init is None:
            h_init = zeros_like(dot(self.input[0], W_c))

        c_init = zeros_like(dot(self.input[0], W_c))

        # computation #
        # move some computation outside of scan to speed it up!
        x_c = dot(self.input, W_c) + b_c
        x_i = dot(self.input, W_i) + b_i
        x_f = dot(self.input, W_f) + b_f
        x_o = dot(self.input, W_o) + b_o

        # now do the recurrent stuff
         _), self.updates = scan(fn=self.recurrent_step,
                                 sequences=[x_c, x_i, x_f, x_o],
                                 outputs_info=[h_init, c_init],
                                 non_sequences=[U_c, U_i, U_f, U_o],

        # if bidirectional, do the same in reverse!
        if bidirectional:
             _), updates_b = scan(fn=self.recurrent_step,
                                  sequences=[x_c, x_i, x_f, x_o],
                                  outputs_info=[h_init, c_init],
                                  non_sequences=[U_c_b, U_i_b, U_f_b, U_o_b],
                                  go_backwards=not backward,
            # flip the hiddens to be the right direction
            hiddens_b = hiddens_b[::-1]
            # update stuff
            self.hiddens += hiddens_b

        log.info("Initialized an LSTM!")
예제 #37
    def __init__(self, inputs_hook=None, params_hook=None, outdir='outputs/conv1d',
                 input_size=None, filter_shape=None, stride=None, border_mode='valid',
                 weights_init='uniform', weights_interval='montreal', weights_mean=0, weights_std=5e-3,
        Initialize a 1-D convolutional layer.

        inputs_hook : Tuple of (shape, variable)
            Routing information for the model to accept inputs from elsewhere. This is used for linking
            different models together. For now, it needs to include the shape information.
        params_hook : List(theano shared variable)
            A list of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables).
        outdir : str
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        input_size : tuple
            Shape of the incoming data: (batch_size, num_channels, data_dimensionality). Most likely, your channels
            will be 1. For example, batches of text will be of the form (N, 1, D) where N=examples in minibatch and
            D=dimensionality (chars, words, etc.)
        filter_shape : tuple
            (num_filters, num_channels, filter_length). This is also the shape of the weights matrix.
        stride : int
            The distance between the receptive field centers of neighboring units. This is the 'stride' of the
            convolution operation.
        border_mode : str, one of 'valid', 'full', 'same'
            A string indicating the convolution border mode.
            If 'valid', the convolution is only computed where the input and the
            filter fully overlap.
            If 'full', the convolution is computed wherever the input and the
            filter overlap by at least one position.
            If 'same', the convolution is computed wherever the input and the
            filter overlap by at least half the filter size, when the filter size
            is odd. In practice, the input is zero-padded with half the filter size
            at the beginning and half at the end (or one less than half in the case
            of an even filter size). This results in an output length that is the
            same as the input length (for both odd and even filter sizes).
        weights_init : str
            Determines the method for initializing model weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        activation : str or Callable
            The activation function to apply to the layer. See opendeep.utils.activation for options.
        convolution : str or Callable
            The 1-dimensional convolution implementation to use. The default of 'mc0' is normally fine. See
            opendeep.utils.conv1d_implementations for alternatives. (This is necessary because Theano only
            supports 2D convolutions at the moment).
        mrg : random
            A random number generator that is used when adding noise.
            I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.

        Theano's default convolution function (`theano.tensor.nnet.conv.conv2d`)
        does not support the 'same' border mode by default. This layer emulates
        it by performing a 'full' convolution and then cropping the result, which
        may negatively affect performance.
        super(Conv1D, self).__init__(**{arg: val for (arg, val) in locals().items() if arg is not 'self'})

        # specifications #
        # grab info from the inputs_hook, or from parameters
        # expect input to be in the form (B, C, I) (batch, channel, input data)
        # inputs_hook is a tuple of (Shape, Input)
        if self.inputs_hook is not None:
            # make sure inputs_hook is a tuple
            assert len(self.inputs_hook) == 2, "expecting inputs_hook to be tuple of (shape, input)"
            self.input = inputs_hook[1]
            # make the input a symbolic matrix
            self.input = T.ftensor3('X')

        # activation function!
        activation_func = get_activation_function(activation)

        # convolution function!
        convolution_func = get_conv1d_function(convolution)

        # filter shape should be in the form (num_filters, num_channels, filter_length)
        num_filters = filter_shape[0]
        filter_length = filter_shape[2]

        # Params - make sure to deal with params_hook! #
        if self.params_hook:
            # make sure the params_hook has W and b
            assert len(self.params_hook) == 2, \
                "Expected 2 params (W and b) for Conv1D, found {0!s}!".format(len(self.params_hook))
            W, b = self.params_hook
            W = get_weights(weights_init=weights_init,
                            # if gaussian
                            # if uniform

            b = get_bias(shape=(num_filters,), name="b", init_values=bias_init)

        # Finally have the two parameters!
        self.params = [W, b]

        # Computational Graph! #
        if border_mode in ['valid', 'full']:
            conved = convolution_func(self.input,
        elif border_mode == 'same':
            conved = convolution_func(self.input,
            shift = (filter_length - 1) // 2
            conved = conved[:, :, shift:self.input_size[2] + shift]

            log.error("Invalid border mode: '%s'" % border_mode)
            raise RuntimeError("Invalid border mode: '%s'" % border_mode)

        self.output = activation_func(conved + b.dimshuffle('x', 0, 'x'))
예제 #38
    def __init__(self, inputs_hook=None, hiddens_hook=None, params_hook=None, outdir='outputs/lstm/',
                 input_size=None, hidden_size=None, output_size=None,
                 activation='sigmoid', hidden_activation='relu', inner_hidden_activation='sigmoid',
                 weights_init='uniform', weights_interval='montreal', weights_mean=0, weights_std=5e-3,
                 r_weights_init='identity', r_weights_interval='montreal', r_weights_mean=0, r_weights_std=5e-3,
                 cost_function='mse', cost_args=None,
                 noise='dropout', noise_level=None, noise_decay=False, noise_decay_amount=.99,
        Initialize a simple recurrent network.

        inputs_hook : Tuple of (shape, variable)
            Routing information for the model to accept inputs from elsewhere. This is used for linking
            different models together (e.g. setting the Softmax model's input layer to the DAE's hidden layer gives a
            newly supervised classification model). For now, it needs to include the shape information (normally the
            dimensionality of the input i.e. n_in).
        hiddens_hook : Tuple of (shape, variable)
            Routing information for the model to accept its hidden representation from elsewhere. For recurrent nets,
            this will be the initial starting value for hidden layers.
        params_hook : List(theano shared variable)
            A list of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters.
        outdir : str
            The location to produce outputs from training or running the :class:`RNN`. If None, nothing will be saved.
        input_size : int
            The size (dimensionality) of the input. If shape is provided in `inputs_hook`, this is optional.
        hidden_size : int
            The size (dimensionality) of the hidden layers. If shape is provided in `hiddens_hook`, this is optional.
        output_size : int
            The size (dimensionality) of the output.
        activation : str or callable
            The nonlinear (or linear) activation to perform after the dot product from hiddens -> output layer.
            This activation function should be appropriate for the output unit types, i.e. 'sigmoid' for binary.
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        hidden_activation : str or callable
            The activation to perform for the hidden units.
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        inner_hidden_activation : str or callable
            The activation to perform for the hidden gates.
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        mrg : random
            A random number generator that is used when adding noise.
            I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.
        weights_init : str
            Determines the method for initializing model weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        r_weights_init : str
            Determines the method for initializing recurrent model weights. See opendeep.utils.nnet for options.
        r_weights_interval : str or float
            If Uniform `r_weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        r_weights_mean : float
            If Gaussian `r_weights_init`, the mean value to use.
        r_weights_std : float
            If Gaussian `r_weights_init`, the standard deviation to use.
        r_bias_init : float
            The initial value to use for the recurrent bias parameter. Most often, the default of 0.0 is preferred.
        cost_function : str or callable
            The function to use when calculating the output cost of the model.
            See opendeep.utils.cost for options. You can also specify your own function, which needs to be callable.
        cost_args : dict
            Any additional named keyword arguments to pass to the specified `cost_function`.
        noise : str
            What type of noise to use for the hidden layers and outputs. See opendeep.utils.noise
            for options. This should be appropriate for the unit activation, i.e. Gaussian for tanh or other
            real-valued activations, etc.
        noise_level : float
            The amount of noise to use for the noise function specified by `hidden_noise`. This could be the
            standard deviation for gaussian noise, the interval for uniform noise, the dropout amount, etc.
        noise_decay : str or False
            Whether to use `noise` scheduling (decay `noise_level` during the course of training),
            and if so, the string input specifies what type of decay to use. See opendeep.utils.decay for options.
            Noise decay (known as noise scheduling) effectively helps the model learn larger variance features first,
            and then smaller ones later (almost as a kind of curriculum learning). May help it converge faster.
        noise_decay_amount : float
            The amount to reduce the `noise_level` after each training epoch based on the decay function specified
            in `noise_decay`.
        direction : str
            The direction this recurrent model should go over its inputs.
            Can be 'forward', 'backward', or 'bidirectional'.
        clip_recurrent_grads : False or float, optional
            Whether to clip the gradients for the parameters that unroll over timesteps (such as the weights
            connecting previous hidden states to the current hidden state, and not the weights from current
            input to hiddens). If it is a float, the gradients for the weights will be hard clipped to the range
        initial_parameters = locals().copy()
        super(LSTM, self).__init__(**initial_parameters)

        # specifications #
        backward = direction.lower() == 'backward'
        bidirectional = direction.lower() == 'bidirectional'

        # activation, cost, and noise functions #
        # recurrent hidden activation function!
        self.hidden_activation_func = get_activation_function(hidden_activation)
        self.inner_hidden_activation_func = get_activation_function(inner_hidden_activation)

        # output activation function!
        activation_func = get_activation_function(activation)

        # Cost function
        cost_function = get_cost_function(cost_function)
        cost_args = cost_args or dict()

        # Now deal with noise if we added it:
        if noise:
            log.debug('Adding %s noise switch.' % str(noise))
            if noise_level is not None:
                noise_level = sharedX(value=noise_level)
                noise_func = get_noise(noise, noise_level=noise_level, mrg=mrg)
                noise_func = get_noise(noise, mrg=mrg)
            # apply the noise as a switch!
            # default to apply noise. this is for the cost and gradient functions to be computed later
            # (not sure if the above statement is accurate such that gradient depends on initial value of switch)
            self.noise_switch = sharedX(value=1, name="basiclayer_noise_switch")

            # noise scheduling
            if noise_decay and noise_level is not None:
                self.noise_schedule = get_decay_function(noise_decay,

        # inputs hook #
        # grab info from the inputs_hook
        # in the case of an inputs_hook, recurrent will always work with the leading tensor dimension
        # being the temporal dimension.
        # input is 3D tensor of (timesteps, batch_size, data_dim)
        # if input is 2D tensor, assume it is of the form (timesteps, data_dim) i.e. batch_size is 1. Convert to 3D.
        # if input is > 3D tensor, assume it is of form (timesteps, batch_size, data...) and flatten to 3D.
        if self.inputs_hook is not None:
            self.input = self.inputs_hook[1]

            if self.input.ndim == 1:
                self.input = T.unbroadcast(self.input.dimshuffle(0, 'x', 'x'), [1, 2])
                self.input_size = 1

            elif self.input.ndim == 2:
                self.input = T.unbroadcast(self.input.dimshuffle(0, 'x', 1), 1)

            elif self.input.ndim > 3:
                self.input = self.input.flatten(3)
                self.input_size = sum(self.input_size)
                raise NotImplementedError("Recurrent input with %d dimensions not supported!" % self.input.ndim)
            xs = self.input
            # Assume input coming from optimizer is (batches, timesteps, data)
            # so, we need to reshape to (timesteps, batches, data)
            self.input = T.tensor3("Xs")
            xs = self.input.dimshuffle(1, 0, 2)

        # The target outputs for supervised training - in the form of (batches, timesteps, output) which is
        # the same dimension ordering as the expected input from optimizer.
        # therefore, we need to swap it like we did to input xs.
        self.target = T.tensor3("Ys")
        ys = self.target.dimshuffle(1, 0, 2)

        # hiddens hook #
        # set an initial value for the recurrent hiddens from hook
        if self.hiddens_hook is not None:
            h_init = self.hiddens_hook[1]
            self.hidden_size = self.hiddens_hook[0]
            # deal with h_init after parameters are made (have to make the same size as hiddens that are computed)
            self.hidden_size = hidden_size

        # for generating #
        # symbolic scalar for how many recurrent steps to use during generation from the model
        self.n_steps = T.iscalar("generate_n_steps")

        # parameters - make sure to deal with params_hook! #
        if self.params_hook is not None:
            if not bidirectional:
                (W_x_c, W_x_i, W_x_f, W_x_o,
                 U_h_c, U_h_i, U_h_f, U_h_o,
                 W_h_y, b_c, b_i, b_f, b_o,
                 b_y) = self.params_hook
                recurrent_params = [U_h_c, U_h_i, U_h_f, U_h_o]
                (W_x_c, W_x_i, W_x_f, W_x_o,
                 U_h_c, U_h_i, U_h_f, U_h_o,
                 U_h_c_b, U_h_i_b, U_h_f_b, U_h_o_b,
                 W_h_y, b_c, b_i, b_f, b_o,
                 b_y) = self.params_hook
                recurrent_params = [U_h_c, U_h_i, U_h_f, U_h_o, U_h_c_b, U_h_i_b, U_h_f_b, U_h_o_b]
        # otherwise, construct our params
            # all input-to-hidden weights
            W_x_c, W_x_i, W_x_f, W_x_o = [
                            shape=(self.input_size, self.hidden_size),
                            name="W_x_%s" % sub,
                            # if gaussian
                            # if uniform
                for sub in ['c', 'i', 'f', 'o']
            # all hidden-to-hidden weights
            U_h_c, U_h_i, U_h_f, U_h_o = [
                            shape=(self.hidden_size, self.hidden_size),
                            name="U_h_%s" % sub,
                            # if gaussian
                            # if uniform
                for sub in ['c', 'i', 'f', 'o']
            # hidden-to-output weights
            W_h_y = get_weights(weights_init=weights_init,
                                shape=(self.hidden_size, self.output_size),
                                # if gaussian
                                # if uniform
            # biases
            b_c, b_i, b_f, b_o = [
                         name="b_%s" % sub,
                for sub in ['c', 'i', 'f', 'o']
            # output bias
            b_y = get_bias(shape=(self.output_size,),
            # clip gradients if we are doing that
            recurrent_params = [U_h_c, U_h_i, U_h_f, U_h_o]
            if clip_recurrent_grads:
                clip = abs(clip_recurrent_grads)
                U_h_c, U_h_i, U_h_f, U_h_o = [theano.gradient.grad_clip(p, -clip, clip) for p in recurrent_params]
            # bidirectional params
                if bidirectional:
                    # all hidden-to-hidden weights
                    U_h_c_b, U_h_i_b, U_h_f_b, U_h_o_b = [
                                    shape=(self.hidden_size, self.hidden_size),
                                    name="U_h_%s_b" % sub,
                                    # if gaussian
                                    # if uniform
                        for sub in ['c', 'i', 'f', 'o']
                    recurrent_params += [U_h_c_b, U_h_i_b, U_h_f_b, U_h_o_b]
                    if clip_recurrent_grads:
                        clip = abs(clip_recurrent_grads)
                        U_h_c_b, U_h_i_b, U_h_f_b, U_h_o_b = [theano.gradient.grad_clip(p, -clip, clip) for p in
                                                              [U_h_c_b, U_h_i_b, U_h_f_b, U_h_o_b]]

        # put all the parameters into our list, and make sure it is in the same order as when we try to load
        # them from a params_hook!!!
        self.params = [W_x_c, W_x_i, W_x_f, W_x_o] + recurrent_params + [W_h_y, b_c, b_i, b_f, b_o, b_y]

        # make h_init the right sized tensor
        if not self.hiddens_hook:
            h_init = T.zeros_like(T.dot(xs[0], W_x_c))

        c_init = T.zeros_like(T.dot(xs[0], W_x_c))

        # computation #
        # move some computation outside of scan to speed it up!
        x_c = T.dot(xs, W_x_c) + b_c
        x_i = T.dot(xs, W_x_i) + b_i
        x_f = T.dot(xs, W_x_f) + b_f
        x_o = T.dot(xs, W_x_o) + b_o

        # now do the recurrent stuff
        (self.hiddens, _), self.updates = theano.scan(
            sequences=[x_c, x_i, x_f, x_o],
            outputs_info=[h_init, c_init],
            non_sequences=[U_h_c, U_h_i, U_h_f, U_h_o],

        # if bidirectional, do the same in reverse!
        if bidirectional:
            (hiddens_b, _), updates_b = theano.scan(
                sequences=[x_c, x_i, x_f, x_o],
                outputs_info=[h_init, c_init],
                non_sequences=[U_h_c_b, U_h_i_b, U_h_f_b, U_h_o_b],
                go_backwards=not backward,
            # flip the hiddens to be the right direction
            hiddens_b = hiddens_b[::-1]
            # update stuff
            self.hiddens += hiddens_b

        # add noise (like dropout) if we wanted it!
        if noise:
            self.hiddens = T.switch(self.noise_switch,

        # now compute the outputs from the leftover (top level) hiddens
        self.output = activation_func(
            T.dot(self.hiddens, W_h_y) + b_y

        # now to define the cost of the model - use the cost function to compare our output with the target value.
        self.cost = cost_function(output=self.output, target=ys, **cost_args)

        log.info("Initialized an LSTM!")
    def __init__(
        RBM constructor. Defines the parameters of the model along with
        basic operations for inferring hidden from visible (and vice-versa),
        as well as for performing CD updates.

        inputs : List of [tuple(shape, `Theano.TensorType`)]
            The dimensionality of the inputs for this model, and the routing information for the model
            to accept inputs from elsewhere. `inputs` variable are expected to be of the form (timesteps, batch, data).
            `shape` will be a monad tuple representing known
            sizes for each dimension in the `Theano.TensorType`. The length of `shape` should be equal to number of
            dimensions in `Theano.TensorType`, where the shape element is an integer representing the size for its
            dimension, or None if the shape isn't known. For example, if you have a matrix with unknown batch size
            but fixed feature size of 784, `shape` would be: (None, 784). The full form of `inputs` would be:
            [((None, 784), <TensorType(float32, matrix)>)].
        hiddens : int or Tuple of (shape, `Theano.TensorType`)
            Int for the number of hidden units to use, or a tuple of shape, expression to route the starting
            hidden values from elsewhere.
        params : Dict(string_name: theano SharedVariable), optional
            A dictionary of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters - such as siamese networks or pretraining some
        outdir : str
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        visible_activation : str or callable
            The nonlinear (or linear) visible activation to perform after the dot product from hiddens -> visible layer.
            This activation function should be appropriate for the input unit types, i.e. 'sigmoid' for binary inputs.
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        hidden_activation : str or callable
            The nonlinear (or linear) hidden activation to perform after the dot product from visible -> hiddens layer.
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        weights_init : str
            Determines the method for initializing model weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        mrg : random
            A random number generator that is used when sampling. The RBM is a probabilistic model, so it relies a lot
            on sampling. I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.
        k : int
            The k number of steps used for CD-k or PCD-k with Gibbs sampling. Basically, the number of samples
            generated from the model to train against reconstructing the original input.
        # init Model to combine the defaults and config dictionaries with the initial parameters.
        initial_parameters = locals().copy()
        super(RBM, self).__init__(**initial_parameters)

        # specifications #
        if len(self.inputs) > 1:
            raise NotImplementedError(
                "Expected 1 input to RBM, found %d. Please merge inputs before passing "
                "to the model!" % len(self.inputs)
        # self.inputs is a list of all the input expressions (we enforce only 1, so self.inputs[0] is the input)
        input_shape, self.input = self.inputs[0]
        if isinstance(input_shape, int):
            self.input_size = ((None,) * (self.input.ndim - 1)) + (input_shape,)
            self.input_size = input_shape
        assert self.input_size is not None, "Need to specify the shape for the last dimension of the input!"

        # our output space is the same as the input space
        self.output_size = self.input_size

        # grab hiddens
        # have only 1 hiddens
        assert len(self.hiddens) == 1, "Expected 1 `hiddens` param, found %d" % len(self.hiddens)
        self.hiddens = self.hiddens[0]
        if isinstance(self.hiddens, int):
            hidden_size = self.hiddens
            hiddens_init = None
        elif isinstance(self.hiddens, tuple):
            hidden_shape, hiddens_init = self.hiddens
            if isinstance(hidden_shape, int):
                hidden_size = hidden_shape
                hidden_size = hidden_shape[-1]
            raise AssertionError(
                "Hiddens need to be an int or tuple of (shape, theano_expression), found %s" % type(self.hiddens)

        # other specifications
        # visible activation function!
        self.visible_activation_func = get_activation_function(visible_activation)

        # make sure the sampling functions are appropriate for the activation functions.
        if is_binary(self.visible_activation_func):
            self.visible_sampling = mrg.binomial
            # TODO: implement non-binary activation
            log.error("Non-binary visible activation not supported yet!")
            raise NotImplementedError("Non-binary visible activation not supported yet!")

        # hidden activation function!
        self.hidden_activation_func = get_activation_function(hidden_activation)

        # make sure the sampling functions are appropriate for the activation functions.
        if is_binary(self.hidden_activation_func):
            self.hidden_sampling = mrg.binomial
            # TODO: implement non-binary activation
            log.error("Non-binary hidden activation not supported yet!")
            raise NotImplementedError("Non-binary hidden activation not supported yet!")

        # parameters - make sure to deal with params_hook! #
        self.W = self.params.get(
                shape=(self.input_size[-1], hidden_size),
                # if gaussian
                # if uniform
        self.b_v = self.params.get("b_v", get_bias(shape=self.input_size[-1], name="b_v", init_values=bias_init))
        self.b_h = self.params.get("b_h", get_bias(shape=hidden_size, name="b_h", init_values=bias_init))

        # Finally have the parameters
        self.params = {"W": self.W, "b_v": self.b_v, "b_h": self.b_h}

        # computation #
        # initialize from visibles if we aren't generating from some hiddens
        if hiddens_init is None:
            [_, v_chain, _, h_chain], self.updates = theano.scan(
                fn=self._gibbs_step_vhv, outputs_info=[None, self.input, None, None], n_steps=k
        # initialize from hiddens
            [_, v_chain, _, h_chain], self.updates = theano.scan(
                fn=self._gibbs_step_hvh, outputs_info=[None, None, None, hiddens_init], n_steps=k

        self.v_sample = v_chain[-1]
        self.h_sample = h_chain[-1]

        mean_v, _, _, _ = self._gibbs_step_vhv(self.v_sample)

        # the free-energy cost function!
        # consider v_sample constant when computing gradients on the cost function
        # this actually keeps v_sample from being considered in the gradient, to set gradient to 0 instead,
        # use theano.gradient.zero_grad
        v_sample_constant = theano.gradient.disconnected_grad(self.v_sample)
        # v_sample_constant = v_sample
        self.cost = (self.free_energy(self.input) - self.free_energy(v_sample_constant)) / self.input.shape[0]

        log.debug("Initialized an RBM shape %s", str((self.input_size, hidden_size)))