Beispiel #1
0
    def __init__(self,
                 input,
                 num_neurons,
                 input_shape,
                 id,
                 rng=None,
                 input_params=None,
                 borrow=True,
                 activation='relu',
                 batch_norm=True,
                 verbose=2):
        super(dot_product_layer, self).__init__(id=id,
                                                type='dot_product',
                                                verbose=verbose)
        if verbose >= 3:
            print "... Creating dot product layer"

        if rng is None:
            rng = numpy.random

        create = False
        if input_params is None:
            create = True
        elif input_params[0] is None:
            create = True
        if create is True:
            w_values = numpy.asarray(
                0.01 * rng.standard_normal(size=(input_shape[1], num_neurons)),
                dtype=theano.config.floatX)
            if activation == 'sigmoid':
                w_values *= 4
            self.w = theano.shared(value=w_values, name='weights')
        else:
            self.w = input_params[0]

        create = False
        if input_params is None:
            create = True
        elif input_params[1] is None:
            create = True
        if create is True:
            b_values = numpy.zeros((num_neurons, ), dtype=theano.config.floatX)
            self.b = theano.shared(value=b_values, name='bias')
        else:
            self.b = input_params[1]

        if batch_norm is True:
            create = False
            if input_params is None:
                create = True
            elif input_params[2] is None:
                create = True
            if create is True:
                alpha_values = numpy.ones((num_neurons, ),
                                          dtype=theano.config.floatX)
                self.alpha = theano.shared(value=alpha_values,
                                           name='batchnorm')
            else:
                self.alpha = input_params[2]

        dot_product = T.dot(input, self.w)

        if batch_norm is True:
            std = dot_product.std(0)
            mean = dot_product.mean(0)
            std += 0.001  # To avoid divide by zero like fudge_factor

            dot_product = dot_product - mean
            dot_product = dot_product * (self.alpha / std)

        dot_product = dot_product + self.b
        dot_product_shp = (input_shape[0], num_neurons)
        self.output, self.output_shape = _activate(x=dot_product,
                                                   activation=activation,
                                                   input_size=dot_product_shp,
                                                   verbose=verbose,
                                                   dimension=1)

        # parameters of the model
        if batch_norm is True:
            self.params = [self.w, self.b, self.alpha]
        else:
            self.params = [self.w, self.b]

        self.L1 = abs(self.w).sum()
        if batch_norm is True: self.L1 = self.L1 + abs(self.alpha).sum()
        self.L2 = (self.w**2).sum()
        if batch_norm is True: self.L2 = self.L2 + (self.alpha**2).sum()
        """
        Ioffe, Sergey, and Christian Szegedy. "Batch normalization: Accelerating deep network
        training by reducing internal covariate shift." arXiv preprint arXiv:1502.03167 (2015). """

        if verbose >= 3:
            print "... Dot Product layer is created with output shape " + str(
                self.output_shape)

        self.num_neurons = num_neurons
        self.activation = activation
        self.batch_norm = batch_norm
Beispiel #2
0
    def __init__(
        self,
        input,
        nkerns,
        input_shape,
        id,
        filter_shape=(3, 3),
        poolsize=(2, 2),
        pooltype='max',
        batch_norm=False,
        border_mode='valid',
        stride=(1, 1),
        rng=None,
        borrow=True,
        activation='relu',
        input_params=None,
        verbose=2,
    ):

        super(conv_pool_layer_2d, self).__init__(id=id,
                                                 type='conv_pool',
                                                 verbose=verbose)
        if verbose >= 3:
            print "... Creating conv pool layer"

        if rng is None:
            rng = numpy.random

        # To copy weights previously created or some wierd initializations
        if input_params is not None:
            init_w = input_params[0]
            init_b = input_params[1]
            if batch_norm is True:
                init_gamma = input_params[2]
                init_beta = input_params[3]
                init_mean = input_params[4]
                init_var = input_params[5]

        mini_batch_size = input_shape[0]
        channels = input_shape[1]
        width = input_shape[3]
        height = input_shape[2]
        # srng = RandomStreams(rng.randint(1,2147462579))
        # Initialize the parameters of this layer.
        w_shp = (nkerns, channels, filter_shape[0], filter_shape[1])

        if input_params is None:
            # fan_in = filter_shape[0]*filter_shape[1]
            # fan_out = filter_shape[0]*filter_shape[1] / numpy.prod(poolsize)
            # w_bound = numpy.sqrt(6. / (fan_in + fan_out))
            self.w = theano.shared(
                value=
                # numpy.asarray(rng.uniform(low=-w_bound, high=w_bound, size =w_shp),
                numpy.asarray(0.01 * rng.standard_normal(size=w_shp),
                              dtype=theano.config.floatX),
                borrow=borrow,
                name='filterbank')
            self.b = theano.shared(value=numpy.zeros(
                (nkerns, ), dtype=theano.config.floatX),
                                   name='bias',
                                   borrow=borrow)
            if batch_norm is True:
                self.gamma = theano.shared(value=numpy.ones(
                    (nkerns, ), dtype=theano.config.floatX),
                                           name='gamma',
                                           borrow=borrow)
                self.beta = theano.shared(value=numpy.zeros(
                    (nkerns, ), dtype=theano.config.floatX),
                                          name='beta',
                                          borrow=borrow)
                self.running_mean = theano.shared(value=numpy.zeros(
                    (nkerns, ), dtype=theano.config.floatX),
                                                  name='population_mean',
                                                  borrow=borrow)
                self.running_var = theano.shared(value=numpy.ones(
                    (nkerns, ), dtype=theano.config.floatX),
                                                 name='population_var',
                                                 borrow=borrow)
        else:
            self.w = init_w
            self.b = init_b
            if batch_norm is True:
                self.gamma = init_gamma
                self.beta = init_beta
                self.running_mean = init_mean
                self.running_var = init_var

        # Perform the convolution part
        convolver = convolver_2d(input=input,
                                 filters=self.w,
                                 subsample=stride,
                                 filter_shape=w_shp,
                                 image_shape=input_shape,
                                 border_mode=border_mode,
                                 verbose=verbose)

        conv_out = convolver.out
        conv_out_shp = (mini_batch_size, nkerns, convolver.out_shp[0],
                        convolver.out_shp[1])

        self.conv_out = conv_out
        if not poolsize == (1, 1):
            pooler = pooler_2d(input=conv_out,
                               img_shp=conv_out_shp,
                               mode=pooltype,
                               ds=poolsize,
                               verbose=verbose)
            pool_out = pooler.out
            pool_out_shp = pooler.out_shp
        else:
            pool_out = conv_out
            pool_out_shp = conv_out_shp
        """
        Ioffe, Sergey, and Christian Szegedy. "Batch normalization: Accelerating deep network
        training by reducing internal covariate shift." arXiv preprint arXiv:1502.03167 (2015). """
        if batch_norm is True:
            batch_norm_out,_,_,mean,var = batch_normalization_train(
                                                  inputs = pool_out + \
                                                                self.b.dimshuffle('x', 0, 'x', 'x'),
                                                  gamma = self.gamma,
                                                  beta = self.beta,
                                                  axes ='spatial',
                                                  running_mean = self.running_mean,
                                                  running_var = self.running_var )

            mean = theano.tensor.unbroadcast(mean, 0)
            var = theano.tensor.unbroadcast(var, 0)
            self.updates[self.running_mean] = mean
            self.updates[self.running_var] = var + 0.001

            batch_norm_inference = batch_normalization_test (
                                                    inputs = pool_out + \
                                                            self.b.dimshuffle('x', 0, 'x', 'x'),
                                                    gamma = self.gamma,
                                                    beta = self.beta,
                                                    axes = 'spatial',
                                                    mean = self.running_mean,
                                                    var = self.running_var )
        else:
            batch_norm_out = pool_out + self.b.dimshuffle('x', 0, 'x', 'x')
            batch_norm_inference = batch_norm_out

        batch_norm_out_shp = pool_out_shp
        self.output, self.output_shape = _activate(
            x=batch_norm_out,
            activation=activation,
            input_size=batch_norm_out_shp,
            verbose=verbose,
            dimension=2)

        self.inference, _ = _activate(x=batch_norm_inference,
                                      activation=activation,
                                      input_size=batch_norm_out_shp,
                                      verbose=verbose,
                                      dimension=2)
        # store parameters of this layer and do some book keeping.
        self.params = [self.w, self.b]
        self.active_params = [self.w, self.b]
        if batch_norm is True:
            self.params.append(self.gamma)
            self.params.append(self.beta)
            self.active_params.append(self.gamma)
            self.active_params.append(self.beta)
            self.params.append(self.running_mean)  # inactive params
            self.params.append(self.running_var)  # inactive params

        self.L1 = abs(self.w).sum()
        # if batch_norm is True : self.L1 = self.L1 # + abs(self.gamma).sum()
        self.L2 = (self.w**2).sum()
        # if batch_norm is True: self.L2 = self.L2 # + (self.gamma**2).sum()

        # Just doing this for print_layer method to use.
        self.nkerns = nkerns
        self.filter_shape = filter_shape
        self.poolsize = poolsize
        self.stride = stride
        self.input_shape = input_shape
        self.num_neurons = nkerns
        self.activation = activation
        self.batch_norm = batch_norm
Beispiel #3
0
    def __init__(self,
                 input,
                 input_shape,
                 id,
                 num_classes=10,
                 rng=None,
                 input_params=None,
                 borrow=True,
                 activation='softmax',
                 verbose=2):

        super(classifier_layer, self).__init__(id=id,
                                               type='classifier',
                                               verbose=verbose)

        if rng is None:
            rng = numpy.random

        if verbose >= 3:
            print "... Creating classifier layer"
        # initialize with 0 the weights W as a matrix of shape (n_in, n_out)
        self.input = input
        # To copy weights previously created or some wierd initializations
        if input_params is not None:
            self.w = input_params[0]
            self.b = input_params[1]
        else:
            self.w = theano.shared(value=numpy.asarray(
                0.01 * rng.standard_normal(size=(input_shape[1], num_classes)),
                dtype=theano.config.floatX),
                                   name='weights',
                                   borrow=borrow)
            self.b = theano.shared(value=numpy.zeros(
                (num_classes, ), dtype=theano.config.floatX),
                                   name='bias',
                                   borrow=borrow)

        self.fit = T.dot(input, self.w) + self.b
        self.p_y_given_x, softmax_shp = _activate(x=self.fit,
                                                  activation=activation,
                                                  input_size=num_classes,
                                                  verbose=verbose,
                                                  dimension=2)

        # compute prediction as class whose probability is maximal in symbolic form
        self.predictions = T.argmax(self.p_y_given_x, axis=1)

        # parameters of the model
        self.L1 = abs(self.w).sum()
        self.L2 = (self.w**2).sum()
        self.params = [self.w, self.b]
        self.probabilities = T.log(self.p_y_given_x)
        self.output = self.p_y_given_x
        self.output_shape = (input_shape[0], num_classes)
        self.num_neurons = num_classes
        self.activation = activation
        self.dropout_rate = 0
        self.batch_norm = False

        if verbose >= 3:
            print "... Classifier layer is created with output shape " + str(
                self.output_shape)
Beispiel #4
0
    def __init__(
        self,
        input,
        nkerns,
        input_shape,
        id,
        output_shape,
        filter_shape=(3, 3),
        poolsize=(1, 1),
        pooltype='max',
        batch_norm=False,
        border_mode='valid',
        stride=(1, 1),
        rng=None,
        borrow=True,
        activation='relu',
        input_params=None,
        verbose=2,
    ):

        super(deconv_layer_2d, self).__init__(id=id,
                                              type='deconv',
                                              verbose=verbose)
        if verbose >= 3:
            print "... Creating deconv layer"

        if rng is None:
            rng = numpy.random

        create_w = False
        create_b = False
        create_bn = False

        # To copy weights previously created or some wierd initializations
        if not input_params is None:
            if input_params[0] is None:
                create_w = True
            if input_params[1] is None:
                create_b = True
            if batch_norm is True:
                if input_params[2] is None:
                    create_bn = True
        else:
            create_w = True
            create_b = True
            create_bn = True

        mini_batch_size = input_shape[0]
        channels = input_shape[1]
        width = input_shape[3]
        height = input_shape[2]
        # srng = RandomStreams(rng.randint(1,2147462579))
        # Initialize the parameters of this layer.

        w_shp = (nkerns, output_shape[2], filter_shape[0], filter_shape[1])
        o_shp = (input_shape[0], output_shape[2], output_shape[0],
                 output_shape[1])

        if create_w is True:
            self.w = theano.shared(value=numpy.asarray(
                0.01 * rng.standard_normal(size=w_shp),
                dtype=theano.config.floatX),
                                   borrow=borrow,
                                   name='filterbank')
        else:
            self.w = input_params[0]

        if create_b is True:
            self.b = theano.shared(value=numpy.zeros(
                (output_shape[2], ), dtype=theano.config.floatX),
                                   name='bias',
                                   borrow=borrow)
        else:
            self.b = input_params[1]

        if batch_norm is True:
            if create_bn is True:
                self.gamma = theano.shared(value=numpy.ones(
                    (output_shape[2], ), dtype=theano.config.floatX),
                                           name='gamma',
                                           borrow=borrow)
                self.beta = theano.shared(value=numpy.zeros(
                    (output_shape[2], ), dtype=theano.config.floatX),
                                          name='beta',
                                          borrow=borrow)
                self.running_mean = theano.shared(value=numpy.zeros(
                    (output_shape[2], ), dtype=theano.config.floatX),
                                                  name='population_mean',
                                                  borrow=borrow)
                self.running_var = theano.shared(value=numpy.ones(
                    (output_shape[2], ), dtype=theano.config.floatX),
                                                 name='population_var',
                                                 borrow=borrow)
            else:
                self.gamma = input_params[2]
                self.beta = input_params[3]
                self.running_mean = input_params[4]
                self.running_var = input_params[5]

        # Perform the convolution part
        convolver = deconvolver_2d(input=input,
                                   filters=self.w,
                                   output_shape=o_shp,
                                   subsample=stride,
                                   filter_shape=w_shp,
                                   image_shape=input_shape,
                                   border_mode=border_mode,
                                   verbose=verbose)

        conv_out = convolver.out
        conv_out_shp = o_shp

        self.conv_out = conv_out
        if not poolsize == (1, 1):
            raise Exception(
                " Unpool operation not yet supported be deconv layer")
            """ #pragma: no cover
             pooler = pooler_2d(
                                input = conv_out,
                                img_shp = conv_out_shp,
                                mode = pooltype,
                                ds = poolsize,
                                verbose = verbose
                            )
             pool_out = pooler.out
             pool_out_shp = pooler.out_shp
             """
        else:
            unpool_out = conv_out
            unpool_out_shp = conv_out_shp
        """
        Ioffe, Sergey, and Christian Szegedy. "Batch normalization: Accelerating deep network
        training by reducing internal covariate shift." arXiv preprint arXiv:1502.03167 (2015). """
        if batch_norm is True:
            batch_norm_out,_,_,mean,var = batch_normalization_train(
                                                  inputs = unpool_out + \
                                                                self.b.dimshuffle('x', 0, 'x', 'x'),
                                                  gamma = self.gamma,
                                                  beta = self.beta,
                                                  axes ='spatial',
                                                  running_mean = self.running_mean,
                                                  running_var = self.running_var )

            mean = theano.tensor.unbroadcast(mean, 0)
            var = theano.tensor.unbroadcast(var, 0)
            var = var + 0.000001
            self.updates[self.running_mean] = mean
            self.updates[self.running_var] = var

            batch_norm_inference = batch_normalization_test (
                                                    inputs = unpool_out + \
                                                            self.b.dimshuffle('x', 0, 'x', 'x'),
                                                    gamma = self.gamma,
                                                    beta = self.beta,
                                                    axes = 'spatial',
                                                    mean = self.running_mean,
                                                    var = self.running_var )
        else:
            batch_norm_out = unpool_out + self.b.dimshuffle('x', 0, 'x', 'x')
            batch_norm_inference = batch_norm_out

        batch_norm_out_shp = unpool_out_shp
        if type(activation) is tuple:
            if activation[0] == 'maxout':
                raise Exception(
                    'Deconvolution layer does not support maxout activation')
        self.output, self.output_shape = _activate(
            x=batch_norm_out,
            activation=activation,
            input_size=batch_norm_out_shp,
            verbose=verbose,
            dimension=2)

        self.inference, _ = _activate(x=batch_norm_inference,
                                      activation=activation,
                                      input_size=batch_norm_out_shp,
                                      verbose=verbose,
                                      dimension=2)
        # store parameters of this layer and do some book keeping.
        self.params = [self.w, self.b]
        self.active_params = [self.w, self.b]
        if batch_norm is True:
            self.params.append(self.gamma)
            self.params.append(self.beta)
            self.active_params.append(self.gamma)
            self.active_params.append(self.beta)
            self.params.append(self.running_mean)  # inactive params
            self.params.append(self.running_var)  # inactive params

        self.L1 = abs(self.w).sum()
        # if batch_norm is True : self.L1 = self.L1 # + abs(self.gamma).sum()
        self.L2 = (self.w**2).sum()
        # if batch_norm is True: self.L2 = self.L2 # + (self.gamma**2).sum()

        # Just doing this for print_layer method to use.
        self.nkerns = nkerns
        self.filter_shape = filter_shape
        self.poolsize = poolsize
        self.stride = stride
        self.input_shape = input_shape
        self.num_neurons = nkerns
        self.activation = activation
        self.batch_norm = batch_norm
    def __init__ (self,
                  input,
                  num_neurons,
                  input_shape,
                  id,
                  rng = None,
                  input_params = None,
                  borrow = True,
                  activation = 'relu',
                  batch_norm = True,
                  verbose = 2 ):
        super(dot_product_layer,self).__init__(id = id, type = 'dot_product', verbose = verbose)
        if verbose >= 3:
            print "... Creating dot product layer"

        if rng is None:
            rng = numpy.random

        if input_params is None:
            w_values = numpy.asarray(0.01 * rng.standard_normal(
                size=(input_shape[1], num_neurons)), dtype=theano.config.floatX)
            if activation == 'sigmoid':
                w_values*=4 
            self.w = theano.shared(value=w_values, name='w')
            b_values = numpy.zeros((num_neurons,), dtype=theano.config.floatX)
            self.b = theano.shared(value=b_values, name='b')
            if batch_norm is True:
                alpha_values = numpy.ones((num_neurons,), dtype = theano.config.floatX)
                self.alpha = theano.shared(value = alpha_values, name = 'alpha')
            
        else:
            self.w = input_params[0]
            self.b = input_params[1]
            if batch_norm is True:
                self.alpha = input_params[2]  

        dot_product = T.dot(input, self.w)
                
        if batch_norm is True:
            std = dot_product.std( 0 )
            mean = dot_product.mean( 0 )
            std += 0.001 # To avoid divide by zero like fudge_factor
        
            dot_product = dot_product - mean 
            dot_product = dot_product * ( self.alpha / std ) 
            
        dot_product = dot_product  + self.b
        dot_product_shp = (input_shape[0], num_neurons)
        self.output, self.output_shape = _activate (x= dot_product,
                                            activation = activation,
                                            input_size = dot_product_shp,
                                            verbose = verbose,
                                            dimension = 1)   
            
        # parameters of the model
        if batch_norm is True:
            self.params = [self.w, self.b, self.alpha]
        else:
            self.params = [self.w, self.b]
                
        self.L1 = abs(self.w).sum()  
        if batch_norm is True: self.L1 = self.L1 + abs(self.alpha).sum()
        self.L2 = (self.w**2).sum()  
        if batch_norm is True: self.L2 = self.L2 + (self.alpha**2).sum()
        
        if verbose >=3: 
            print "... Dot Product layer is created with output shape " + str(self.output_shape)        

        self.num_neurons = num_neurons
        self.activation = activation
        self.batch_norm = batch_norm
Beispiel #6
0
    def __init__(
        self,
        input,
        nkerns,
        input_shape,
        id,
        filter_shape=(3, 3),
        poolsize=(2, 2),
        pooltype='max',
        batch_norm=False,
        border_mode='valid',
        stride=(1, 1),
        rng=None,
        borrow=True,
        activation='relu',
        input_params=None,
        verbose=2,
    ):

        super(conv_pool_layer_2d, self).__init__(id=id,
                                                 type='conv_pool',
                                                 verbose=verbose)
        if verbose >= 3:
            print "... Creating conv pool layer"

        if rng is None:
            rng = numpy.random

        # To copy weights previously created or some wierd initializations
        if input_params is not None:
            init_w = input_params[0]
            init_b = input_params[1]
            if batch_norm is True:
                init_alpha = input_params[2]

        mini_batch_size = input_shape[0]
        channels = input_shape[1]
        width = input_shape[3]
        height = input_shape[2]
        # srng = RandomStreams(rng.randint(1,2147462579))
        # Initialize the parameters of this layer.
        w_shp = (nkerns, channels, filter_shape[0], filter_shape[1])
        if input_params is None:
            # I have no idea what this is all about. Saw this being used in theano tutorials,
            # I am doing the same thing.
            fan_in = filter_shape[0] * filter_shape[1]
            fan_out = filter_shape[0] * filter_shape[1] / numpy.prod(poolsize)
            w_bound = numpy.sqrt(6. / (fan_in + fan_out))
            self.w = theano.shared(value=numpy.asarray(
                rng.uniform(low=-w_bound, high=w_bound, size=w_shp),
                dtype=theano.config.floatX),
                                   borrow=borrow,
                                   name='filterbank')
            self.b = theano.shared(value=numpy.zeros(
                (w_shp[0]), dtype=theano.config.floatX),
                                   name='bias',
                                   borrow=borrow)
            self.alpha = theano.shared(value=numpy.ones(
                (w_shp[0]), dtype=theano.config.floatX),
                                       name='batchnorm',
                                       borrow=borrow)
        else:
            self.w = init_w
            self.b = init_b
            if batch_norm is True:
                self.alpha = init_alpha

        # Perform the convolution part
        convolver = convolver_2d(input=input,
                                 filters=self.w,
                                 subsample=stride,
                                 filter_shape=w_shp,
                                 image_shape=input_shape,
                                 border_mode=border_mode,
                                 verbose=verbose)

        conv_out = convolver.out
        conv_out_shp = (mini_batch_size, nkerns, convolver.out_shp[0],
                        convolver.out_shp[1])

        self.conv_out = conv_out
        if not poolsize == (1, 1):
            pooler = pooler_2d(input=conv_out,
                               img_shp=conv_out_shp,
                               mode=pooltype,
                               ds=poolsize,
                               verbose=verbose)
            pool_out = pooler.out
            pool_out_shp = pooler.out_shp
        else:
            pool_out = conv_out
            pool_out_shp = conv_out_shp
        """
        Ioffe, Sergey, and Christian Szegedy. "Batch normalization: Accelerating deep network
        training by reducing internal covariate shift." arXiv preprint arXiv:1502.03167 (2015). """
        if batch_norm is True:
            mean = pool_out.mean((0, 2, 3), keepdims=True)
            std = pool_out.std((0, 2, 3), keepdims=True)
            std += 0.001  # To avoid divide by zero like fudge factor
            pool_out = pool_out - mean
            # use one bias for both batch norm and regular bias.
            batch_norm_out = pool_out * ( self.alpha.dimshuffle('x', 0, 'x', 'x') / std ) + \
                                                        self.b.dimshuffle('x', 0, 'x', 'x')
        else:
            batch_norm_out = pool_out + self.b.dimshuffle('x', 0, 'x', 'x')

        batch_norm_out_shp = pool_out_shp
        self.output, self.output_shape = _activate(
            x=batch_norm_out,
            activation=activation,
            input_size=batch_norm_out_shp,
            verbose=verbose,
            dimension=2)

        # store parameters of this layer and do some book keeping.
        self.params = [self.w, self.b]
        if batch_norm is True:
            self.params.append(self.alpha)

        self.L1 = abs(self.w).sum()
        if batch_norm is True: self.L1 = self.L1 + abs(self.alpha).sum()
        self.L2 = (self.w**2).sum()
        if batch_norm is True: self.L2 = self.L2 + (self.alpha**2).sum()

        # Just doing this for print_layer method to use.
        self.nkerns = nkerns
        self.filter_shape = filter_shape
        self.poolsize = poolsize
        self.stride = stride
        self.input_shape = input_shape
        self.num_neurons = nkerns
        self.activation = activation
        self.batch_norm = batch_norm
Beispiel #7
0
    def __init__ ( self,                    
                   input,
                   nkerns,
                   input_shape,   
                   id,                
                   filter_shape = (3,3),                   
                   poolsize = (2,2),
                   pooltype = 'max',
                   batch_norm = False,                   
                   border_mode = 'valid',  
                   stride = (1,1),
                   rng = None,
                   borrow = True,
                   activation = 'relu',
                   input_params = None,                   
                   verbose = 2,
                 ):

        super(conv_pool_layer_2d,self).__init__(id = id, type = 'conv_pool', verbose = verbose)                
        if verbose >=3: 
            print "... Creating conv pool layer"

        if rng is None:
            rng = numpy.random

        # To copy weights previously created or some wierd initializations
        if input_params is not None:
            init_w = input_params[0]
            init_b = input_params[1]
            if batch_norm is True:
                init_alpha = input_params[2]

        mini_batch_size  = input_shape[0]
        channels   = input_shape[1] 
        width      = input_shape[3]
        height     = input_shape[2]
        # srng = RandomStreams(rng.randint(1,2147462579))
        # Initialize the parameters of this layer.
        w_shp = (nkerns, channels, filter_shape[0], filter_shape[1])        
        if input_params is None:
            # I have no idea what this is all about. Saw this being used in theano tutorials, 
            # I am doing the same thing.
            fan_in = filter_shape[0]*filter_shape[1]
            fan_out = filter_shape[0]*filter_shape[1] / numpy.prod(poolsize)        
            w_bound = numpy.sqrt(6. / (fan_in + fan_out))          
            self.w = theano.shared(value=
                   numpy.asarray(rng.uniform(low=-w_bound, high=w_bound, size =w_shp),
                                    dtype=theano.config.floatX ), borrow=borrow, name ='w' )
            self.b = theano.shared(value=numpy.zeros((w_shp[0]), dtype=theano.config.floatX),
                                     name = 'b', borrow=borrow)  
            self.alpha = theano.shared(value=numpy.ones((w_shp[0]), 
                                 dtype=theano.config.floatX), name = 'alpha', borrow = borrow)                                                                                                                    
        else:
            self.w = init_w
            self.b = init_b
            if batch_norm is True:
                self.alpha = init_alpha

        # Perform the convolution part
        convolver  = convolver_2d (
                        input = input,
                        filters = self.w,
                        subsample = stride,
                        filter_shape = w_shp,
                        image_shape = input_shape,
                        border_mode = border_mode,
                        verbose = verbose
                           )        

        conv_out = convolver.out
        conv_out_shp = (mini_batch_size, nkerns, convolver.out_shp[0], convolver.out_shp[1])   

        self.conv_out = conv_out
        if not poolsize == (1,1):
             pooler = pooler_2d( 
                                input = conv_out,
                                img_shp = conv_out_shp,
                                mode = pooltype, 
                                ds = poolsize,
                                verbose = verbose
                            )
             pool_out = pooler.out
             pool_out_shp = pooler.out_shp           
        else:
            pool_out = conv_out
            pool_out_shp = conv_out_shp

        if batch_norm is True:
            mean = pool_out.mean( (0,2,3), keepdims = True )
            std = pool_out.std( (0,2,3), keepdims = True )            
            std += 0.001 # To avoid divide by zero like fudge factor        
            pool_out = pool_out - mean
            # use one bias for both batch norm and regular bias.
            batch_norm_out = pool_out * ( self.alpha.dimshuffle('x', 0, 'x', 'x') / std ) + \
                                                        self.b.dimshuffle('x', 0, 'x', 'x')
        else:
            batch_norm_out = pool_out + self.b.dimshuffle('x', 0, 'x', 'x')            
        
        batch_norm_out_shp = pool_out_shp        
        self.output, self.output_shape = _activate (x= batch_norm_out,
                                            activation = activation,
                                            input_size = batch_norm_out_shp,
                                            verbose = verbose,
                                            dimension = 2)
         
        # store parameters of this layer and do some book keeping.
        self.params = [self.w, self.b] 
        if batch_norm is True: 
            self.params.append(self.alpha)        

        self.L1 = abs(self.w).sum() 
        if batch_norm is True : self.L1 = self.L1 + abs(self.alpha).sum()
        self.L2 = (self.w**2).sum() 
        if batch_norm is True: self.L2 = self.L2 + (self.alpha**2).sum()

        # Just doing this for print_layer method to use. 
        self.nkerns = nkerns
        self.filter_shape = filter_shape
        self.poolsize = poolsize
        self.stride = stride
        self.input_shape = input_shape
        self.num_neurons = nkerns
        self.activation = activation
        self.batch_norm = batch_norm
Beispiel #8
0
    def __init__(self,
                 input,
                 num_neurons,
                 input_shape,
                 id,
                 rng=None,
                 input_params=None,
                 borrow=True,
                 activation='relu',
                 batch_norm=True,
                 verbose=2):
        super(dot_product_layer, self).__init__(id=id,
                                                type='dot_product',
                                                verbose=verbose)
        if verbose >= 3:
            print "... Creating dot product layer"

        if rng is None:
            rng = numpy.random

        create = False
        if input_params is None:
            create = True
        elif input_params[0] is None:
            create = True
        if create is True:
            w_values = numpy.asarray(
                0.01 * rng.standard_normal(size=(input_shape[1], num_neurons)),
                dtype=theano.config.floatX)
            if activation == 'sigmoid':
                w_values *= 4
            self.w = theano.shared(value=w_values, name='weights')
        else:
            self.w = input_params[0]

        create = False
        if input_params is None:
            create = True
        elif input_params[1] is None:
            create = True
        if create is True:
            b_values = numpy.zeros((num_neurons, ), dtype=theano.config.floatX)
            self.b = theano.shared(value=b_values, name='bias')
        else:
            self.b = input_params[1]

        if batch_norm is True:
            create = False
            if input_params is None:
                create = True
            elif input_params[2] is None:
                create = True
            if create is True:
                gamma_values = numpy.ones((1, num_neurons),
                                          dtype=theano.config.floatX)
                self.gamma = theano.shared(value=gamma_values, name='gamma')
                beta_values = numpy.zeros((1, num_neurons),
                                          dtype=theano.config.floatX)
                self.beta = theano.shared(value=beta_values, name='beta')
                self.running_mean = theano.shared(value=numpy.zeros(
                    (1, num_neurons), dtype=theano.config.floatX),
                                                  name='population_mean',
                                                  borrow=borrow)
                self.running_var = theano.shared(value=numpy.ones(
                    (1, num_neurons), dtype=theano.config.floatX),
                                                 name='population_var',
                                                 borrow=borrow)
            else:
                self.gamma = input_params[2]
                self.beta = input_params[3]
                self.running_mean = input_params[4]
                self.running_var = input_params[5]

        linear_fit = T.dot(input, self.w) + self.b

        if batch_norm is True:
            batch_norm_out, _, _, mean, var = batch_normalization_train(
                inputs=linear_fit,
                gamma=self.gamma,
                beta=self.beta,
                running_mean=self.running_mean,
                running_var=self.running_var)

            mean = theano.tensor.unbroadcast(mean, 0)
            var = theano.tensor.unbroadcast(var, 0)
            self.updates[self.running_mean] = mean
            self.updates[self.running_var] = var + 0.001

            batch_norm_inference = batch_normalization_test(
                inputs=linear_fit,
                gamma=self.gamma,
                beta=self.beta,
                mean=self.running_mean,
                var=self.running_var)
        else:
            batch_norm_out = linear_fit
            batch_norm_inference = batch_norm_out

        batch_norm_shp = (input_shape[0], num_neurons)
        self.output, self.output_shape = _activate(x=batch_norm_out,
                                                   activation=activation,
                                                   input_size=batch_norm_shp,
                                                   verbose=verbose,
                                                   dimension=1)

        self.inference, _ = _activate(x=batch_norm_out,
                                      activation=activation,
                                      input_size=batch_norm_shp,
                                      verbose=verbose,
                                      dimension=1)

        # parameters of the model
        if batch_norm is True:
            self.params = [
                self.w, self.b, self.gamma, self.beta, self.running_mean,
                self.running_var
            ]
            self.active_params = [self.w, self.b, self.gamma, self.beta]
        else:
            self.params = [self.w, self.b]
            self.active_params = [self.w, self.b]

        self.L1 = abs(self.w).sum()
        # if batch_norm is True: self.L1 = self.L1 + abs(self.gamma).sum()
        self.L2 = (self.w**2).sum()
        # if batch_norm is True: self.L2 = self.L2 + (self.gamma**2).sum()
        """
        Ioffe, Sergey, and Christian Szegedy. "Batch normalization: Accelerating deep network
        training by reducing internal covariate shift." arXiv preprint arXiv:1502.03167 (2015). """

        if verbose >= 3:
            print "... Dot Product layer is created with output shape " + str(
                self.output_shape)

        self.num_neurons = num_neurons
        self.activation = activation
        self.batch_norm = batch_norm