def __init__(
            self,
            rng,
            input,
            n_in,
            n_out,
            W=None,
            b=None,
            gamma=None,
            beta=None,
            activation_function=T.tanh
    ):
        """
        Typical hidden layer of a MLP: units are fully-connected and have
        sigmoidal activation function. Weight matrix W is of shape (n_in,n_out)
        and the bias vector b is of shape (n_out,).

        NOTE : The nonlinearity used here is tanh

        Hidden unit activation is given by: tanh(dot(input,W) + b)

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input: theano.tensor.dmatrix
        :param input: a symbolic tensor of shape (n_examples, n_in)

        :type n_in: int
        :param n_in: dimensionality of input

        :type n_out: int
        :param n_out: number of hidden units

        :type activation_function: theano.Op or function
        :param activation_function: Non linearity to be applied in the hidden layer
        """
        self.input = input

        # Note : optimal initialization of weights is dependent on the
        #        activation function used (among other things).
        #        For example, results presented in [Xavier10] suggest that you
        #        should use 4 times larger initial weights for sigmoid compared to tanh
        #        We have no info for other function, so we use the same as tanh.
        W_values = W
        if W is None:
            W_values = numpy.asarray(
                rng.uniform(
                    low=-numpy.sqrt(6. / (n_in + n_out)),
                    high=numpy.sqrt(6. / (n_in + n_out)),
                    size=(n_in, n_out)
                ),
                dtype=theano.config.floatX
            )
            if activation_function == theano.tensor.nnet.sigmoid:
                W_values *= 4
        if isinstance(W_values, numpy.ndarray):
            W_values = theano.shared(value=W_values, name='W', borrow=True)
        self.W = W_values

        b_values = b
        if b is None:
            b_values = numpy.zeros((n_out,), dtype=theano.config.floatX)
        if isinstance(b_values, numpy.ndarray):
            b_values = theano.shared(value=b_values, name='b', borrow=True)
        self.b = b_values

        gamma_val = gamma
        if gamma is None:
            gamma_val = numpy.ones((n_out,), dtype=theano.config.floatX)
        if isinstance(gamma_val, numpy.ndarray):
            gamma_val = theano.shared(value=gamma_val, name='gamma', borrow=True)
        self.gamma = gamma_val

        beta_val = beta
        if beta is None:
            beta_val = numpy.zeros((n_out,), dtype=theano.config.floatX)
        if isinstance(beta_val, numpy.ndarray):
            beta_val = theano.shared(value=beta_val, name='beta', borrow=True)
        self.beta = beta_val

        # linear output
        lin_output = T.dot(input, self.W) + self.b

        bn_output = batch_normalization(
            inputs=lin_output,
            gamma=self.gamma,
            beta=self.beta,
            mean=lin_output.mean(),
            std=lin_output.std(),
            mode='high_mem'
        )

        if activation_function is None:
            self.output = bn_output
        elif activation_function == T.nnet.relu:
            self.output = T.clip(bn_output, 0, 20)
        else:
            self.output = activation_function(bn_output)

        # parameters of the model
        self.params = [self.W, self.b, self.gamma, self.beta]
Beispiel #2
0
    def __init__(self,
                 rng,
                 is_train,
                 input_data,
                 filter_shape,
                 image_shape,
                 ssample=(1, 1),
                 bordermode='valid',
                 p=0.5,
                 alpha=0.0):
        """
        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input: theano.tensor.dtensor4
        :param input: symbolic image tensor, of shape image_shape

        :type filter_shape: tuple or list of length 4
        :param filter_shape: (number of filters, num input feature maps,
                              filter height, filter width)

        :type image_shape: tuple or list of length 4
        :param image_shape: (batch size, num input feature maps,
                             image height, image width)

        :type poolsize: tuple or list of length 2
        :param poolsize: the downsampling (pooling) factor (#rows, #cols)
        """

        assert image_shape[1] == filter_shape[1]

        # there are "num input feature maps * filter height * filter width"
        # inputs to each hidden unit
        fan_in = numpy.prod(filter_shape[1:])
        # each unit in the lower layer receives a gradient from:
        # "num output feature maps * filter height * filter width" /
        #   pooling size
        fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) //
                   numpy.prod(ssample))

        # initialize weights with random weights
        W_bound = numpy.sqrt(6. / (fan_in + fan_out))
        self.W = theano.shared(numpy.asarray(rng.uniform(low=-W_bound,
                                                         high=W_bound,
                                                         size=filter_shape),
                                             dtype=theano.config.floatX),
                               borrow=True)

        # the bias is a 1D tensor -- one bias per output feature map
        b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX)
        self.b = theano.shared(value=b_values, borrow=True)

        gamma_values = numpy.ones((filter_shape[0], ),
                                  dtype=theano.config.floatX)
        self.gamma = theano.shared(value=gamma_values, borrow=True)

        beta_values = numpy.zeros((filter_shape[0], ),
                                  dtype=theano.config.floatX)
        self.beta = theano.shared(value=beta_values, borrow=True)

        # convolve input feature maps with filters
        conv_out = conv2d(input=input_data,
                          filters=self.W,
                          filter_shape=filter_shape,
                          input_shape=image_shape,
                          subsample=ssample,
                          border_mode=bordermode)

        # add the bias term. Since the bias is a vector (1D array), we first
        # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will
        # thus be broadcasted across mini-batches and feature map
        # width & height

        lin_output = conv_out + self.b.dimshuffle('x', 0, 'x', 'x')

        bn_output = batch_normalization(
            inputs=lin_output,
            gamma=self.gamma.dimshuffle('x', 0, 'x', 'x'),
            beta=self.beta.dimshuffle('x', 0, 'x', 'x'),
            mean=lin_output.mean((0, ), keepdims=True),
            std=lin_output.std((0, ), keepdims=True),
            mode='low_mem')

        activated_output = T.nnet.relu(bn_output, alpha=alpha)

        dropped_output = drop(activated_output, p)

        self.output = T.switch(T.neq(is_train, 0), dropped_output,
                               p * activated_output)

        # store parameters of this layer
        self.params = [self.W, self.b, self.gamma, self.beta]

        # keep track of model input
        self.input = input_data
    def __init__(self, input, n_in, n_out, W=None, b=None, gamma=None, beta=None):
        """ Initialize the parameters of the linear regression

        :type input: theano.tensor.TensorType
        :param input: symbolic variable that describes the input of the
                      architecture (one minibatch)

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in
                     which the datapoints lie

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
                      which the labels lie

        """

        self.n_in = n_in
        self.n_out = n_out
        # initialize with 0 the weights W as a matrix of shape (n_in, n_out)
        W_values = W
        if W is None:
            W_values = numpy.asarray(
                numpy.random.uniform(
                    low=-numpy.sqrt(6. / (n_in + n_out)),
                    high=numpy.sqrt(6. / (n_in + n_out)),
                    size=(n_in, n_out)
                ),
                dtype=theano.config.floatX
            )
        self.W = theano.shared(value=W_values, name='W', borrow=True)

        b_values = b
        if b is None:
            b_values = numpy.zeros((n_out,), dtype=theano.config.floatX)
        self.b = theano.shared(value=b_values, name='b', borrow=True)

        gamma_val = gamma
        if gamma is None:
            gamma_val = numpy.ones((n_out,), dtype=theano.config.floatX)
        self.gamma = theano.shared(value=gamma_val, name='gamma', borrow=True)

        beta_val = beta
        if beta is None:
            beta_val = numpy.zeros((n_out,), dtype=theano.config.floatX)
        self.beta = theano.shared(value=beta_val, name='beta', borrow=True)

        # keep track of model input
        self.input = input

        # Linear regression.
        linear = T.dot(self.input, self.W) + self.b
        bn_output = batch_normalization(
            inputs=linear,
            gamma=self.gamma,
            beta=self.beta,
            mean=linear.mean((0,), keepdims=True),
            std=linear.std((0,), keepdims=True),
            mode='high_mem'
        )

        # Output of the model
        self.output = bn_output

        # parameters of the model
        self.params = [self.W, self.b, self.gamma, self.beta]
        self.L1 = T.sum(abs(self.W))
        self.L2 = T.sum(self.W ** 2)