Exemple #1
0
def test_batch_normalization_test():
    for axes in ('per-activation', 'spatial', (1, 2, 3, 4)):
        for vartype in (T.tensor5, T.tensor4, T.tensor3, T.matrix, T.vector):
            x, scale, bias, mean, var = (vartype(n)
                                         for n in ('x', 'scale', 'bias', 'mean', 'var'))
            ndim = x.ndim
            eps = 5e-3  # some non-standard value to test if it's used

            # remove non-existing axes
            if isinstance(axes, tuple):
                axes = tuple(i for i in axes if i < ndim)
            if len(axes) == 0:
                continue

            # forward pass
            out = bn.batch_normalization_test(x, scale, bias, mean,
                                              var, axes, eps)
            # reference forward pass
            if axes == 'per-activation':
                axes2 = (0,)
            elif axes == 'spatial':
                axes2 = (0,) + tuple(range(2, ndim))
            else:
                axes2 = axes
            scale2, bias2, mean2, var2 = (T.addbroadcast(t, *axes2)
                                          for t in (scale, bias, mean, var))
            out2 = (x - mean2) * (scale2 / T.sqrt(var2 + eps)) + bias2
            # backward pass
            dy = vartype('dy')
            grads = T.grad(None, wrt=[x, scale, bias, mean, var], known_grads={out: dy})
            # reference backward pass
            grads2 = T.grad(None, wrt=[x, scale, bias, mean, var], known_grads={out2: dy})
            # compile
            f = theano.function([x, scale, bias, mean, var, dy],
                                [out, out2] + grads + grads2)
            # check if the abstract Ops have been replaced
            assert not any([isinstance(n.op, (bn.AbstractBatchNormTrain,
                                              bn.AbstractBatchNormInference,
                                              bn.AbstractBatchNormTrainGrad))
                            for n in f.maker.fgraph.toposort()])
            # run
            for data_shape in ((10, 20, 30, 40, 10), (4, 3, 1, 1, 1), (1, 1, 5, 5, 5)):
                data_shape = data_shape[:ndim]
                param_shape = tuple(1 if d in axes2 else s
                                    for d, s in enumerate(data_shape))
                X = 4 + 3 * numpy.random.randn(*data_shape).astype(theano.config.floatX)
                Dy = -1 + 2 * numpy.random.randn(*data_shape).astype(theano.config.floatX)
                Scale = numpy.random.randn(*param_shape).astype(theano.config.floatX)
                Bias = numpy.random.randn(*param_shape).astype(theano.config.floatX)
                Mean = numpy.random.randn(*param_shape).astype(theano.config.floatX)
                Var = numpy.random.rand(*param_shape).astype(theano.config.floatX)
                outputs = f(X, Scale, Bias, Mean, Var, Dy)
                # compare outputs
                utt.assert_allclose(outputs[0], outputs[1])  # out
                # compare gradients
                utt.assert_allclose(outputs[2], outputs[2 + 5], atol=4e-5)  # dx
                utt.assert_allclose(outputs[3], outputs[3 + 5], atol=4e-5)  # dscale
                utt.assert_allclose(outputs[4], outputs[4 + 5])  # dbias
                utt.assert_allclose(outputs[5], outputs[5 + 5])  # dmean
                utt.assert_allclose(outputs[6], outputs[6 + 5], rtol=2e-3, atol=4e-5)  # dvar
Exemple #2
0
def batch_norm(input_,
               gamma,
               beta,
               running_mean,
               running_var,
               is_training,
               axes='per-activation'):

    if is_training:
        # returns:
        #   batch-normalized output
        #   batch mean
        #   batch variance
        #   running mean (for later use as population mean estimate)
        #   running var (for later use as population var estimate)
        out, _, _, running_mean, running_var = batch_normalization_train(
            input_,
            gamma,
            beta,
            running_mean=running_mean,
            running_var=running_var,
            axes=axes,
            running_average_factor=0.9,
        )
    else:
        out = batch_normalization_test(
            input_,
            gamma,
            beta,
            running_mean,
            running_var,
            axes=axes,
        )
    return out, running_mean, running_var
Exemple #3
0
def test_batch_normalization_test():
    for axes in ('per-activation', 'spatial', (1, 2, 3, 4)):
        for vartype in (T.tensor5, T.tensor4, T.tensor3, T.matrix, T.vector):
            x, scale, bias, mean, var = (vartype(n)
                                         for n in ('x', 'scale', 'bias', 'mean', 'var'))
            ndim = x.ndim
            eps = 5e-3  # some non-standard value to test if it's used

            # remove non-existing axes
            if isinstance(axes, tuple):
                axes = tuple(i for i in axes if i < ndim)
            if len(axes) == 0:
                continue

            # forward pass
            out = bn.batch_normalization_test(x, scale, bias, mean,
                                              var, axes, eps)
            # reference forward pass
            if axes == 'per-activation':
                axes2 = (0,)
            elif axes == 'spatial':
                axes2 = (0,) + tuple(range(2, ndim))
            else:
                axes2 = axes
            scale2, bias2, mean2, var2 = (T.addbroadcast(t, *axes2)
                                          for t in (scale, bias, mean, var))
            out2 = (x - mean2) * (scale2 / T.sqrt(var2 + eps)) + bias2
            # backward pass
            dy = vartype('dy')
            grads = T.grad(None, wrt=[x, scale, bias, mean, var], known_grads={out: dy})
            # reference backward pass
            grads2 = T.grad(None, wrt=[x, scale, bias, mean, var], known_grads={out2: dy})
            # compile
            f = theano.function([x, scale, bias, mean, var, dy],
                                [out, out2] + grads + grads2)
            # check if the abstract Ops have been replaced
            assert not any([isinstance(n.op, (bn.AbstractBatchNormTrain,
                                              bn.AbstractBatchNormInference,
                                              bn.AbstractBatchNormTrainGrad))
                            for n in f.maker.fgraph.toposort()])
            # run
            for data_shape in ((10, 20, 30, 40, 10), (4, 3, 1, 1, 1), (1, 1, 5, 5, 5)):
                data_shape = data_shape[:ndim]
                param_shape = tuple(1 if d in axes2 else s
                                    for d, s in enumerate(data_shape))
                X = 4 + 3 * numpy.random.randn(*data_shape).astype(theano.config.floatX)
                Dy = -1 + 2 * numpy.random.randn(*data_shape).astype(theano.config.floatX)
                Scale = numpy.random.randn(*param_shape).astype(theano.config.floatX)
                Bias = numpy.random.randn(*param_shape).astype(theano.config.floatX)
                Mean = numpy.random.randn(*param_shape).astype(theano.config.floatX)
                Var = numpy.random.rand(*param_shape).astype(theano.config.floatX)
                outputs = f(X, Scale, Bias, Mean, Var, Dy)
                # compare outputs
                utt.assert_allclose(outputs[0], outputs[1])  # out
                # compare gradients
                utt.assert_allclose(outputs[2], outputs[2 + 5], atol=4e-5)  # dx
                utt.assert_allclose(outputs[3], outputs[3 + 5], atol=4e-5)  # dscale
                utt.assert_allclose(outputs[4], outputs[4 + 5])  # dbias
                utt.assert_allclose(outputs[5], outputs[5 + 5])  # dmean
                utt.assert_allclose(outputs[6], outputs[6 + 5], rtol=2e-3, atol=4e-5)  # dvar
Exemple #4
0
def batchNorm(x, train, gamma, beta, RM, RV, ax):
    values_train, _, _, newRM, newRV = batch_normalization_train(
        x, gamma, beta, axes=ax, running_mean=RM, running_var=RV)
    values = ifelse(T.neq(train, 1),
                    batch_normalization_test(x, gamma, beta, RM, RV, axes=ax),
                    values_train)
    return values, newRM, newRV
    def forward(self, X, is_traning):
        activation = X.dot(self.W)
        if is_traning:
            out, batch_mean, batch_invstd, new_running_mean, new_running_var = batch_normalization_train(
                activation,
                self.gamma,
                self.beta,
                running_mean=self.running_mean,
                running_var=self.running_var)

            self.running_update = [
                (self.running_mean, new_running_mean),
                (self.running_var, new_running_var),
            ]

            # how it updates exactly
            # batch_var = 1 / (batch_invstd * batch_invstd)
            # self.running_update = [
            #   (self.running_mean, 0.9*self.running_mean + 0.1*batch_mean),
            #   (self.running_var, 0.9*self.running_var + 0.1*batch_var),
            #   ]

        else:
            out = batch_normalization_test(activation, self.gamma, self.beta,
                                           self.running_mean, self.running_var)
        return self.f(out)
Exemple #6
0
 def forward(self, X, is_training):
     activation = X.dot(self.W)
     if is_training:
         out, b_mean, b_invstd, new_mean, new_var = batch_normalization_train(
             activation,
             self.gamma,
             self.beta,
             running_mean =self.running_mean,
             running_var= self.running_var
         )
         # write the updates rules of mean and var in the layer to access them outside
         self.running_update = [
             (self.running_mean, new_mean),
             (self.running_var, new_var)                
         ]
     else :
         out = batch_normalization_test(
             activation,
             self.gamma,
             self.beta,
             self.running_mean,
             self.running_var
         )
         
     if self.af == None:
         return out
     else:
         return self.af(out)
 def forward(self, Z, is_training):
     a = Z.dot(self.W)
     if is_training:
         out, batch_mean, batch_invstd, new_rn_mean, new_rn_var = batch_normalization_train(
             a,
             self.gamma,
             self.beta,
             running_mean=self.rn_mean,
             running_var=self.rn_var)
         self.running_update = [(self.rn_mean, new_rn_mean),
                                (self.rn_var, new_rn_var)]
     else:
         out = batch_normalization_test(a, self.gamma, self.beta,
                                        self.rn_mean, self.rn_var)
     return self.f(out)
Exemple #8
0
def test_batch_normalization_broadcastable():
    # check if the broadcastable pattern is preserved by the optimizations
    x, dy, scale, bias, mean, var = (T.scalar(n).dimshuffle(['x'] * 5)
                                     for n in ('x', 'dy', 'scale', 'bias', 'mean', 'var'))

    # forward pass
    out_train, x_mean, x_invstd = bn.batch_normalization_train(x, scale, bias, 'spatial')
    out_test = bn.batch_normalization_test(x, scale, bias, mean, var, 'spatial')
    # backward pass
    grads_train = T.grad(None, wrt=[x, scale, bias], known_grads={out_train: dy})
    grads_test = T.grad(None, wrt=[x, scale, bias], known_grads={out_test: dy})
    # compile
    f = theano.function([x, scale, bias, mean, var, dy],
                        [out_train, x_mean, x_invstd, out_test] + grads_train + grads_test)
    assert not any([isinstance(n.op, (bn.AbstractBatchNormTrain,
                                      bn.AbstractBatchNormInference,
                                      bn.AbstractBatchNormTrainGrad))
                    for n in f.maker.fgraph.toposort()])
  def forward(self, X, is_training):
    activation = X.dot(self.W)
    if is_training:
      # returns:
      #   batch-normalized output
      #   batch mean
      #   batch variance
      #   running mean (for later use as population mean estimate)
      #   running var (for later use as population var estimate)
      out, batch_mean, batch_invstd, new_running_mean, new_running_var = batch_normalization_train(
        activation,
        self.gamma,
        self.beta,
        running_mean=self.running_mean,
        running_var=self.running_var,
      )

      self.running_update = [
        (self.running_mean, new_running_mean),
        (self.running_var, new_running_var),
      ]

      # if you don't trust the built-in bn function
      # batch_var = 1 / (batch_invstd * batch_invstd)
      # self.running_update = [
      #   (self.running_mean, 0.9*self.running_mean + 0.1*batch_mean),
      #   (self.running_var, 0.9*self.running_var + 0.1*batch_var),
      # ]

    else:
      out = batch_normalization_test(
        activation,
        self.gamma,
        self.beta,
        self.running_mean,
        self.running_var
      )
    return self.f(out)
def batch_norm(
  input_,
  gamma,
  beta,
  running_mean,
  running_var,
  is_training,
  axes='per-activation'):

  if is_training:
    # returns:
    #   batch-normalized output
    #   batch mean
    #   batch variance
    #   running mean (for later use as population mean estimate)
    #   running var (for later use as population var estimate)
    out, _, _, new_running_mean, new_running_var = batch_normalization_train(
      input_,
      gamma,
      beta,
      running_mean=running_mean,
      running_var=running_var,
      axes=axes,
      running_average_factor=0.9,
    )
  else:
    new_running_mean = None
    new_running_var = None # just to ensure we don't try to use them
    out = batch_normalization_test(
      input_,
      gamma,
      beta,
      running_mean,
      running_var,
      axes=axes,
    )
  return out, new_running_mean, new_running_var
Exemple #11
0
 def forward(self, prev_layer, train):
     self.drop = self.rng.binomial(size=prev_layer.shape,
                                   p=1 - self.dropout_rate)
     prev_layer = prev_layer * self.drop
     self.Z = T.dot(prev_layer, self.weights)
     if self.batch_norm == True:
         if train == True:
             self.Z, _, _, self.n_running_mean, self.n_running_variance = batch_normalization_train(
                 self.Z,
                 self.gamma,
                 self.beta,
                 running_mean=self.running_mean,
                 running_var=self.running_variance)
             self.n_norm_params = [
                 self.n_running_mean, self.n_running_variance
             ]
         else:
             self.Z = batch_normalization_test(self.Z, self.gamma,
                                               self.beta, self.running_mean,
                                               self.running_variance)
     else:
         self.Z += self.biases
         self.n_norm_params = []
     if self.activation == 'relu':
         self.A = T.nnet.nnet.relu(self.Z)
     elif self.activation == 'sigmoid':
         self.A = T.nnet.nnet.sigmoid(self.Z)
     elif self.activation == 'tanh':
         self.A = 2 * T.nnet.nnet.sigmoid(self.Z) - 1
     elif self.activation == 'leaky_relu':
         self.A = T.nnet.nnet.relu(self.Z, alpha=0.1)
     elif self.activation == 'softmax':
         self.A = T.nnet.nnet.softmax(self.Z)
     else:
         raise ValueError('Activation Error')
     return self.A
Exemple #12
0
def test_batch_normalization_train_broadcast():
    for axes in ('per-activation', 'spatial', (1, 2, 3, 4)):
        for vartype in (T.tensor5, T.tensor4, T.tensor3, T.matrix, T.vector):
            x = vartype('x')
            ndim = x.ndim
            eps = 5e-3  # some non-standard value to test if it's used
            running_average_factor = 0.3

            # remove non-existing axes
            if isinstance(axes, tuple):
                axes = tuple(i for i in axes if i < ndim)
            if len(axes) == 0:
                continue

            # convert axes to explicit list
            if axes == 'per-activation':
                axes2 = (0,)
            elif axes == 'spatial':
                axes2 = (0,) + tuple(range(2, ndim))
            else:
                axes2 = axes

            # compute axes for parameter tensors
            non_bc_axes = tuple(i for i in range(ndim) if i not in axes2)
            params_dimshuffle = ['x'] * ndim
            for i, axis in enumerate(non_bc_axes):
                params_dimshuffle[axis] = i

            # construct non-broadcasted parameter variables
            param_type = T.TensorType(x.dtype, (False,) * len(non_bc_axes))
            scale, bias, running_mean, running_var = (param_type(n)
                                                      for n in ('scale', 'bias',
                                                                'running_mean',
                                                                'running_var'))

            # broadcast parameter variables
            scale_bc = scale.dimshuffle(params_dimshuffle)
            bias_bc = bias.dimshuffle(params_dimshuffle)
            running_mean_bc = running_mean.dimshuffle(params_dimshuffle)
            running_var_bc = running_var.dimshuffle(params_dimshuffle)

            # batch_normalization_train with original, non-broadcasted variables
            train_non_bc = \
                bn.batch_normalization_train(
                    x, scale, bias, axes, eps,
                    running_average_factor, running_mean, running_var)
            # batch_normalization_train with broadcasted variables
            train_bc = \
                bn.batch_normalization_train(
                    x, scale_bc, bias_bc, axes, eps,
                    running_average_factor, running_mean_bc, running_var_bc)
            train_bc = tuple([train_bc[0]] +  # out
                             [r.dimshuffle(non_bc_axes) for r in train_bc[1:]])

            # batch_normalization_test with original, non-broadcasted variables
            test_non_bc = \
                bn.batch_normalization_test(
                    x, scale, bias, running_mean, running_var, axes, eps)
            # batch_normalization_test with broadcasted variables
            test_bc = \
                bn.batch_normalization_test(
                    x, scale_bc, bias_bc, running_mean_bc, running_var_bc, axes, eps)

            # subtract the results of the non-broadcasted and broadcasted calls
            results_non_bc = train_non_bc + (test_non_bc,)
            results_bc = train_bc + (test_bc,)
            results = [abs(r - r_bc) for (r, r_bc) in zip(results_non_bc, results_bc)]

            # compile to compute all differences
            f = theano.function([x, scale, bias, running_mean, running_var],
                                T.sum(sum(results)))

            # the paired ops are exactly the same, so the optimizer should have
            # collapsed the sum of differences to a constant zero
            nodes = f.maker.fgraph.toposort()
            if theano.config.mode != "FAST_COMPILE":
                assert len(nodes) == 1
                assert isinstance(nodes[0].op, theano.compile.DeepCopyOp)
            inputs = [numpy.asarray(numpy.random.rand(*((4,) * n)), x.dtype)
                      for n in [x.ndim, scale.ndim, bias.ndim,
                                running_mean.ndim, running_var.ndim]]
            assert 0.0 == f(*inputs)
Exemple #13
0
    def __init__(
        self,
        input,
        nkerns,
        input_shape,
        id,
        filter_shape=(3, 3),
        poolsize=(2, 2),
        pooltype='max',
        batch_norm=False,
        border_mode='valid',
        stride=(1, 1),
        rng=None,
        borrow=True,
        activation='relu',
        input_params=None,
        verbose=2,
    ):

        super(conv_pool_layer_2d, self).__init__(id=id,
                                                 type='conv_pool',
                                                 verbose=verbose)
        if verbose >= 3:
            print "... Creating conv pool layer"

        if rng is None:
            rng = numpy.random

        # To copy weights previously created or some wierd initializations
        if input_params is not None:
            init_w = input_params[0]
            init_b = input_params[1]
            if batch_norm is True:
                init_gamma = input_params[2]
                init_beta = input_params[3]
                init_mean = input_params[4]
                init_var = input_params[5]

        mini_batch_size = input_shape[0]
        channels = input_shape[1]
        width = input_shape[3]
        height = input_shape[2]
        # srng = RandomStreams(rng.randint(1,2147462579))
        # Initialize the parameters of this layer.
        w_shp = (nkerns, channels, filter_shape[0], filter_shape[1])

        if input_params is None:
            # fan_in = filter_shape[0]*filter_shape[1]
            # fan_out = filter_shape[0]*filter_shape[1] / numpy.prod(poolsize)
            # w_bound = numpy.sqrt(6. / (fan_in + fan_out))
            self.w = theano.shared(
                value=
                # numpy.asarray(rng.uniform(low=-w_bound, high=w_bound, size =w_shp),
                numpy.asarray(0.01 * rng.standard_normal(size=w_shp),
                              dtype=theano.config.floatX),
                borrow=borrow,
                name='filterbank')
            self.b = theano.shared(value=numpy.zeros(
                (nkerns, ), dtype=theano.config.floatX),
                                   name='bias',
                                   borrow=borrow)
            if batch_norm is True:
                self.gamma = theano.shared(value=numpy.ones(
                    (nkerns, ), dtype=theano.config.floatX),
                                           name='gamma',
                                           borrow=borrow)
                self.beta = theano.shared(value=numpy.zeros(
                    (nkerns, ), dtype=theano.config.floatX),
                                          name='beta',
                                          borrow=borrow)
                self.running_mean = theano.shared(value=numpy.zeros(
                    (nkerns, ), dtype=theano.config.floatX),
                                                  name='population_mean',
                                                  borrow=borrow)
                self.running_var = theano.shared(value=numpy.ones(
                    (nkerns, ), dtype=theano.config.floatX),
                                                 name='population_var',
                                                 borrow=borrow)
        else:
            self.w = init_w
            self.b = init_b
            if batch_norm is True:
                self.gamma = init_gamma
                self.beta = init_beta
                self.running_mean = init_mean
                self.running_var = init_var

        # Perform the convolution part
        convolver = convolver_2d(input=input,
                                 filters=self.w,
                                 subsample=stride,
                                 filter_shape=w_shp,
                                 image_shape=input_shape,
                                 border_mode=border_mode,
                                 verbose=verbose)

        conv_out = convolver.out
        conv_out_shp = (mini_batch_size, nkerns, convolver.out_shp[0],
                        convolver.out_shp[1])

        self.conv_out = conv_out
        if not poolsize == (1, 1):
            pooler = pooler_2d(input=conv_out,
                               img_shp=conv_out_shp,
                               mode=pooltype,
                               ds=poolsize,
                               verbose=verbose)
            pool_out = pooler.out
            pool_out_shp = pooler.out_shp
        else:
            pool_out = conv_out
            pool_out_shp = conv_out_shp
        """
        Ioffe, Sergey, and Christian Szegedy. "Batch normalization: Accelerating deep network
        training by reducing internal covariate shift." arXiv preprint arXiv:1502.03167 (2015). """
        if batch_norm is True:
            batch_norm_out,_,_,mean,var = batch_normalization_train(
                                                  inputs = pool_out + \
                                                                self.b.dimshuffle('x', 0, 'x', 'x'),
                                                  gamma = self.gamma,
                                                  beta = self.beta,
                                                  axes ='spatial',
                                                  running_mean = self.running_mean,
                                                  running_var = self.running_var )

            mean = theano.tensor.unbroadcast(mean, 0)
            var = theano.tensor.unbroadcast(var, 0)
            self.updates[self.running_mean] = mean
            self.updates[self.running_var] = var + 0.001

            batch_norm_inference = batch_normalization_test (
                                                    inputs = pool_out + \
                                                            self.b.dimshuffle('x', 0, 'x', 'x'),
                                                    gamma = self.gamma,
                                                    beta = self.beta,
                                                    axes = 'spatial',
                                                    mean = self.running_mean,
                                                    var = self.running_var )
        else:
            batch_norm_out = pool_out + self.b.dimshuffle('x', 0, 'x', 'x')
            batch_norm_inference = batch_norm_out

        batch_norm_out_shp = pool_out_shp
        self.output, self.output_shape = _activate(
            x=batch_norm_out,
            activation=activation,
            input_size=batch_norm_out_shp,
            verbose=verbose,
            dimension=2)

        self.inference, _ = _activate(x=batch_norm_inference,
                                      activation=activation,
                                      input_size=batch_norm_out_shp,
                                      verbose=verbose,
                                      dimension=2)
        # store parameters of this layer and do some book keeping.
        self.params = [self.w, self.b]
        self.active_params = [self.w, self.b]
        if batch_norm is True:
            self.params.append(self.gamma)
            self.params.append(self.beta)
            self.active_params.append(self.gamma)
            self.active_params.append(self.beta)
            self.params.append(self.running_mean)  # inactive params
            self.params.append(self.running_var)  # inactive params

        self.L1 = abs(self.w).sum()
        # if batch_norm is True : self.L1 = self.L1 # + abs(self.gamma).sum()
        self.L2 = (self.w**2).sum()
        # if batch_norm is True: self.L2 = self.L2 # + (self.gamma**2).sum()

        # Just doing this for print_layer method to use.
        self.nkerns = nkerns
        self.filter_shape = filter_shape
        self.poolsize = poolsize
        self.stride = stride
        self.input_shape = input_shape
        self.num_neurons = nkerns
        self.activation = activation
        self.batch_norm = batch_norm
Exemple #14
0
    def __init__(
        self,
        input,
        nkerns,
        input_shape,
        id,
        output_shape,
        filter_shape=(3, 3),
        poolsize=(1, 1),
        pooltype='max',
        batch_norm=False,
        border_mode='valid',
        stride=(1, 1),
        rng=None,
        borrow=True,
        activation='relu',
        input_params=None,
        verbose=2,
    ):

        super(deconv_layer_2d, self).__init__(id=id,
                                              type='deconv',
                                              verbose=verbose)
        if verbose >= 3:
            print "... Creating deconv layer"

        if rng is None:
            rng = numpy.random

        create_w = False
        create_b = False
        create_bn = False

        # To copy weights previously created or some wierd initializations
        if not input_params is None:
            if input_params[0] is None:
                create_w = True
            if input_params[1] is None:
                create_b = True
            if batch_norm is True:
                if input_params[2] is None:
                    create_bn = True
        else:
            create_w = True
            create_b = True
            create_bn = True

        mini_batch_size = input_shape[0]
        channels = input_shape[1]
        width = input_shape[3]
        height = input_shape[2]
        # srng = RandomStreams(rng.randint(1,2147462579))
        # Initialize the parameters of this layer.

        w_shp = (nkerns, output_shape[2], filter_shape[0], filter_shape[1])
        o_shp = (input_shape[0], output_shape[2], output_shape[0],
                 output_shape[1])

        if create_w is True:
            self.w = theano.shared(value=numpy.asarray(
                0.01 * rng.standard_normal(size=w_shp),
                dtype=theano.config.floatX),
                                   borrow=borrow,
                                   name='filterbank')
        else:
            self.w = input_params[0]

        if create_b is True:
            self.b = theano.shared(value=numpy.zeros(
                (output_shape[2], ), dtype=theano.config.floatX),
                                   name='bias',
                                   borrow=borrow)
        else:
            self.b = input_params[1]

        if batch_norm is True:
            if create_bn is True:
                self.gamma = theano.shared(value=numpy.ones(
                    (output_shape[2], ), dtype=theano.config.floatX),
                                           name='gamma',
                                           borrow=borrow)
                self.beta = theano.shared(value=numpy.zeros(
                    (output_shape[2], ), dtype=theano.config.floatX),
                                          name='beta',
                                          borrow=borrow)
                self.running_mean = theano.shared(value=numpy.zeros(
                    (output_shape[2], ), dtype=theano.config.floatX),
                                                  name='population_mean',
                                                  borrow=borrow)
                self.running_var = theano.shared(value=numpy.ones(
                    (output_shape[2], ), dtype=theano.config.floatX),
                                                 name='population_var',
                                                 borrow=borrow)
            else:
                self.gamma = input_params[2]
                self.beta = input_params[3]
                self.running_mean = input_params[4]
                self.running_var = input_params[5]

        # Perform the convolution part
        convolver = deconvolver_2d(input=input,
                                   filters=self.w,
                                   output_shape=o_shp,
                                   subsample=stride,
                                   filter_shape=w_shp,
                                   image_shape=input_shape,
                                   border_mode=border_mode,
                                   verbose=verbose)

        conv_out = convolver.out
        conv_out_shp = o_shp

        self.conv_out = conv_out
        if not poolsize == (1, 1):
            raise Exception(
                " Unpool operation not yet supported be deconv layer")
            """ #pragma: no cover
             pooler = pooler_2d(
                                input = conv_out,
                                img_shp = conv_out_shp,
                                mode = pooltype,
                                ds = poolsize,
                                verbose = verbose
                            )
             pool_out = pooler.out
             pool_out_shp = pooler.out_shp
             """
        else:
            unpool_out = conv_out
            unpool_out_shp = conv_out_shp
        """
        Ioffe, Sergey, and Christian Szegedy. "Batch normalization: Accelerating deep network
        training by reducing internal covariate shift." arXiv preprint arXiv:1502.03167 (2015). """
        if batch_norm is True:
            batch_norm_out,_,_,mean,var = batch_normalization_train(
                                                  inputs = unpool_out + \
                                                                self.b.dimshuffle('x', 0, 'x', 'x'),
                                                  gamma = self.gamma,
                                                  beta = self.beta,
                                                  axes ='spatial',
                                                  running_mean = self.running_mean,
                                                  running_var = self.running_var )

            mean = theano.tensor.unbroadcast(mean, 0)
            var = theano.tensor.unbroadcast(var, 0)
            var = var + 0.000001
            self.updates[self.running_mean] = mean
            self.updates[self.running_var] = var

            batch_norm_inference = batch_normalization_test (
                                                    inputs = unpool_out + \
                                                            self.b.dimshuffle('x', 0, 'x', 'x'),
                                                    gamma = self.gamma,
                                                    beta = self.beta,
                                                    axes = 'spatial',
                                                    mean = self.running_mean,
                                                    var = self.running_var )
        else:
            batch_norm_out = unpool_out + self.b.dimshuffle('x', 0, 'x', 'x')
            batch_norm_inference = batch_norm_out

        batch_norm_out_shp = unpool_out_shp
        if type(activation) is tuple:
            if activation[0] == 'maxout':
                raise Exception(
                    'Deconvolution layer does not support maxout activation')
        self.output, self.output_shape = _activate(
            x=batch_norm_out,
            activation=activation,
            input_size=batch_norm_out_shp,
            verbose=verbose,
            dimension=2)

        self.inference, _ = _activate(x=batch_norm_inference,
                                      activation=activation,
                                      input_size=batch_norm_out_shp,
                                      verbose=verbose,
                                      dimension=2)
        # store parameters of this layer and do some book keeping.
        self.params = [self.w, self.b]
        self.active_params = [self.w, self.b]
        if batch_norm is True:
            self.params.append(self.gamma)
            self.params.append(self.beta)
            self.active_params.append(self.gamma)
            self.active_params.append(self.beta)
            self.params.append(self.running_mean)  # inactive params
            self.params.append(self.running_var)  # inactive params

        self.L1 = abs(self.w).sum()
        # if batch_norm is True : self.L1 = self.L1 # + abs(self.gamma).sum()
        self.L2 = (self.w**2).sum()
        # if batch_norm is True: self.L2 = self.L2 # + (self.gamma**2).sum()

        # Just doing this for print_layer method to use.
        self.nkerns = nkerns
        self.filter_shape = filter_shape
        self.poolsize = poolsize
        self.stride = stride
        self.input_shape = input_shape
        self.num_neurons = nkerns
        self.activation = activation
        self.batch_norm = batch_norm
Exemple #15
0
    def __init__(self,
                 input,
                 num_neurons,
                 input_shape,
                 id,
                 rng=None,
                 input_params=None,
                 borrow=True,
                 activation='relu',
                 batch_norm=True,
                 verbose=2):
        super(dot_product_layer, self).__init__(id=id,
                                                type='dot_product',
                                                verbose=verbose)
        if verbose >= 3:
            print "... Creating dot product layer"

        if rng is None:
            rng = numpy.random

        create = False
        if input_params is None:
            create = True
        elif input_params[0] is None:
            create = True
        if create is True:
            w_values = numpy.asarray(
                0.01 * rng.standard_normal(size=(input_shape[1], num_neurons)),
                dtype=theano.config.floatX)
            if activation == 'sigmoid':
                w_values *= 4
            self.w = theano.shared(value=w_values, name='weights')
        else:
            self.w = input_params[0]

        create = False
        if input_params is None:
            create = True
        elif input_params[1] is None:
            create = True
        if create is True:
            b_values = numpy.zeros((num_neurons, ), dtype=theano.config.floatX)
            self.b = theano.shared(value=b_values, name='bias')
        else:
            self.b = input_params[1]

        if batch_norm is True:
            create = False
            if input_params is None:
                create = True
            elif input_params[2] is None:
                create = True
            if create is True:
                gamma_values = numpy.ones((1, num_neurons),
                                          dtype=theano.config.floatX)
                self.gamma = theano.shared(value=gamma_values, name='gamma')
                beta_values = numpy.zeros((1, num_neurons),
                                          dtype=theano.config.floatX)
                self.beta = theano.shared(value=beta_values, name='beta')
                self.running_mean = theano.shared(value=numpy.zeros(
                    (1, num_neurons), dtype=theano.config.floatX),
                                                  name='population_mean',
                                                  borrow=borrow)
                self.running_var = theano.shared(value=numpy.ones(
                    (1, num_neurons), dtype=theano.config.floatX),
                                                 name='population_var',
                                                 borrow=borrow)
            else:
                self.gamma = input_params[2]
                self.beta = input_params[3]
                self.running_mean = input_params[4]
                self.running_var = input_params[5]

        linear_fit = T.dot(input, self.w) + self.b

        if batch_norm is True:
            batch_norm_out, _, _, mean, var = batch_normalization_train(
                inputs=linear_fit,
                gamma=self.gamma,
                beta=self.beta,
                running_mean=self.running_mean,
                running_var=self.running_var)

            mean = theano.tensor.unbroadcast(mean, 0)
            var = theano.tensor.unbroadcast(var, 0)
            self.updates[self.running_mean] = mean
            self.updates[self.running_var] = var + 0.001

            batch_norm_inference = batch_normalization_test(
                inputs=linear_fit,
                gamma=self.gamma,
                beta=self.beta,
                mean=self.running_mean,
                var=self.running_var)
        else:
            batch_norm_out = linear_fit
            batch_norm_inference = batch_norm_out

        batch_norm_shp = (input_shape[0], num_neurons)
        self.output, self.output_shape = _activate(x=batch_norm_out,
                                                   activation=activation,
                                                   input_size=batch_norm_shp,
                                                   verbose=verbose,
                                                   dimension=1)

        self.inference, _ = _activate(x=batch_norm_out,
                                      activation=activation,
                                      input_size=batch_norm_shp,
                                      verbose=verbose,
                                      dimension=1)

        # parameters of the model
        if batch_norm is True:
            self.params = [
                self.w, self.b, self.gamma, self.beta, self.running_mean,
                self.running_var
            ]
            self.active_params = [self.w, self.b, self.gamma, self.beta]
        else:
            self.params = [self.w, self.b]
            self.active_params = [self.w, self.b]

        self.L1 = abs(self.w).sum()
        # if batch_norm is True: self.L1 = self.L1 + abs(self.gamma).sum()
        self.L2 = (self.w**2).sum()
        # if batch_norm is True: self.L2 = self.L2 + (self.gamma**2).sum()
        """
        Ioffe, Sergey, and Christian Szegedy. "Batch normalization: Accelerating deep network
        training by reducing internal covariate shift." arXiv preprint arXiv:1502.03167 (2015). """

        if verbose >= 3:
            print "... Dot Product layer is created with output shape " + str(
                self.output_shape)

        self.num_neurons = num_neurons
        self.activation = activation
        self.batch_norm = batch_norm