def __init__(self, input, num_neurons, input_shape, id, rng=None, input_params=None, borrow=True, activation='relu', batch_norm=True, verbose=2): super(dot_product_layer, self).__init__(id=id, type='dot_product', verbose=verbose) if verbose >= 3: print "... Creating dot product layer" if rng is None: rng = numpy.random create = False if input_params is None: create = True elif input_params[0] is None: create = True if create is True: w_values = numpy.asarray( 0.01 * rng.standard_normal(size=(input_shape[1], num_neurons)), dtype=theano.config.floatX) if activation == 'sigmoid': w_values *= 4 self.w = theano.shared(value=w_values, name='weights') else: self.w = input_params[0] create = False if input_params is None: create = True elif input_params[1] is None: create = True if create is True: b_values = numpy.zeros((num_neurons, ), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, name='bias') else: self.b = input_params[1] if batch_norm is True: create = False if input_params is None: create = True elif input_params[2] is None: create = True if create is True: alpha_values = numpy.ones((num_neurons, ), dtype=theano.config.floatX) self.alpha = theano.shared(value=alpha_values, name='batchnorm') else: self.alpha = input_params[2] dot_product = T.dot(input, self.w) if batch_norm is True: std = dot_product.std(0) mean = dot_product.mean(0) std += 0.001 # To avoid divide by zero like fudge_factor dot_product = dot_product - mean dot_product = dot_product * (self.alpha / std) dot_product = dot_product + self.b dot_product_shp = (input_shape[0], num_neurons) self.output, self.output_shape = _activate(x=dot_product, activation=activation, input_size=dot_product_shp, verbose=verbose, dimension=1) # parameters of the model if batch_norm is True: self.params = [self.w, self.b, self.alpha] else: self.params = [self.w, self.b] self.L1 = abs(self.w).sum() if batch_norm is True: self.L1 = self.L1 + abs(self.alpha).sum() self.L2 = (self.w**2).sum() if batch_norm is True: self.L2 = self.L2 + (self.alpha**2).sum() """ Ioffe, Sergey, and Christian Szegedy. "Batch normalization: Accelerating deep network training by reducing internal covariate shift." arXiv preprint arXiv:1502.03167 (2015). """ if verbose >= 3: print "... Dot Product layer is created with output shape " + str( self.output_shape) self.num_neurons = num_neurons self.activation = activation self.batch_norm = batch_norm
def __init__( self, input, nkerns, input_shape, id, filter_shape=(3, 3), poolsize=(2, 2), pooltype='max', batch_norm=False, border_mode='valid', stride=(1, 1), rng=None, borrow=True, activation='relu', input_params=None, verbose=2, ): super(conv_pool_layer_2d, self).__init__(id=id, type='conv_pool', verbose=verbose) if verbose >= 3: print "... Creating conv pool layer" if rng is None: rng = numpy.random # To copy weights previously created or some wierd initializations if input_params is not None: init_w = input_params[0] init_b = input_params[1] if batch_norm is True: init_gamma = input_params[2] init_beta = input_params[3] init_mean = input_params[4] init_var = input_params[5] mini_batch_size = input_shape[0] channels = input_shape[1] width = input_shape[3] height = input_shape[2] # srng = RandomStreams(rng.randint(1,2147462579)) # Initialize the parameters of this layer. w_shp = (nkerns, channels, filter_shape[0], filter_shape[1]) if input_params is None: # fan_in = filter_shape[0]*filter_shape[1] # fan_out = filter_shape[0]*filter_shape[1] / numpy.prod(poolsize) # w_bound = numpy.sqrt(6. / (fan_in + fan_out)) self.w = theano.shared( value= # numpy.asarray(rng.uniform(low=-w_bound, high=w_bound, size =w_shp), numpy.asarray(0.01 * rng.standard_normal(size=w_shp), dtype=theano.config.floatX), borrow=borrow, name='filterbank') self.b = theano.shared(value=numpy.zeros( (nkerns, ), dtype=theano.config.floatX), name='bias', borrow=borrow) if batch_norm is True: self.gamma = theano.shared(value=numpy.ones( (nkerns, ), dtype=theano.config.floatX), name='gamma', borrow=borrow) self.beta = theano.shared(value=numpy.zeros( (nkerns, ), dtype=theano.config.floatX), name='beta', borrow=borrow) self.running_mean = theano.shared(value=numpy.zeros( (nkerns, ), dtype=theano.config.floatX), name='population_mean', borrow=borrow) self.running_var = theano.shared(value=numpy.ones( (nkerns, ), dtype=theano.config.floatX), name='population_var', borrow=borrow) else: self.w = init_w self.b = init_b if batch_norm is True: self.gamma = init_gamma self.beta = init_beta self.running_mean = init_mean self.running_var = init_var # Perform the convolution part convolver = convolver_2d(input=input, filters=self.w, subsample=stride, filter_shape=w_shp, image_shape=input_shape, border_mode=border_mode, verbose=verbose) conv_out = convolver.out conv_out_shp = (mini_batch_size, nkerns, convolver.out_shp[0], convolver.out_shp[1]) self.conv_out = conv_out if not poolsize == (1, 1): pooler = pooler_2d(input=conv_out, img_shp=conv_out_shp, mode=pooltype, ds=poolsize, verbose=verbose) pool_out = pooler.out pool_out_shp = pooler.out_shp else: pool_out = conv_out pool_out_shp = conv_out_shp """ Ioffe, Sergey, and Christian Szegedy. "Batch normalization: Accelerating deep network training by reducing internal covariate shift." arXiv preprint arXiv:1502.03167 (2015). """ if batch_norm is True: batch_norm_out,_,_,mean,var = batch_normalization_train( inputs = pool_out + \ self.b.dimshuffle('x', 0, 'x', 'x'), gamma = self.gamma, beta = self.beta, axes ='spatial', running_mean = self.running_mean, running_var = self.running_var ) mean = theano.tensor.unbroadcast(mean, 0) var = theano.tensor.unbroadcast(var, 0) self.updates[self.running_mean] = mean self.updates[self.running_var] = var + 0.001 batch_norm_inference = batch_normalization_test ( inputs = pool_out + \ self.b.dimshuffle('x', 0, 'x', 'x'), gamma = self.gamma, beta = self.beta, axes = 'spatial', mean = self.running_mean, var = self.running_var ) else: batch_norm_out = pool_out + self.b.dimshuffle('x', 0, 'x', 'x') batch_norm_inference = batch_norm_out batch_norm_out_shp = pool_out_shp self.output, self.output_shape = _activate( x=batch_norm_out, activation=activation, input_size=batch_norm_out_shp, verbose=verbose, dimension=2) self.inference, _ = _activate(x=batch_norm_inference, activation=activation, input_size=batch_norm_out_shp, verbose=verbose, dimension=2) # store parameters of this layer and do some book keeping. self.params = [self.w, self.b] self.active_params = [self.w, self.b] if batch_norm is True: self.params.append(self.gamma) self.params.append(self.beta) self.active_params.append(self.gamma) self.active_params.append(self.beta) self.params.append(self.running_mean) # inactive params self.params.append(self.running_var) # inactive params self.L1 = abs(self.w).sum() # if batch_norm is True : self.L1 = self.L1 # + abs(self.gamma).sum() self.L2 = (self.w**2).sum() # if batch_norm is True: self.L2 = self.L2 # + (self.gamma**2).sum() # Just doing this for print_layer method to use. self.nkerns = nkerns self.filter_shape = filter_shape self.poolsize = poolsize self.stride = stride self.input_shape = input_shape self.num_neurons = nkerns self.activation = activation self.batch_norm = batch_norm
def __init__(self, input, input_shape, id, num_classes=10, rng=None, input_params=None, borrow=True, activation='softmax', verbose=2): super(classifier_layer, self).__init__(id=id, type='classifier', verbose=verbose) if rng is None: rng = numpy.random if verbose >= 3: print "... Creating classifier layer" # initialize with 0 the weights W as a matrix of shape (n_in, n_out) self.input = input # To copy weights previously created or some wierd initializations if input_params is not None: self.w = input_params[0] self.b = input_params[1] else: self.w = theano.shared(value=numpy.asarray( 0.01 * rng.standard_normal(size=(input_shape[1], num_classes)), dtype=theano.config.floatX), name='weights', borrow=borrow) self.b = theano.shared(value=numpy.zeros( (num_classes, ), dtype=theano.config.floatX), name='bias', borrow=borrow) self.fit = T.dot(input, self.w) + self.b self.p_y_given_x, softmax_shp = _activate(x=self.fit, activation=activation, input_size=num_classes, verbose=verbose, dimension=2) # compute prediction as class whose probability is maximal in symbolic form self.predictions = T.argmax(self.p_y_given_x, axis=1) # parameters of the model self.L1 = abs(self.w).sum() self.L2 = (self.w**2).sum() self.params = [self.w, self.b] self.probabilities = T.log(self.p_y_given_x) self.output = self.p_y_given_x self.output_shape = (input_shape[0], num_classes) self.num_neurons = num_classes self.activation = activation self.dropout_rate = 0 self.batch_norm = False if verbose >= 3: print "... Classifier layer is created with output shape " + str( self.output_shape)
def __init__( self, input, nkerns, input_shape, id, output_shape, filter_shape=(3, 3), poolsize=(1, 1), pooltype='max', batch_norm=False, border_mode='valid', stride=(1, 1), rng=None, borrow=True, activation='relu', input_params=None, verbose=2, ): super(deconv_layer_2d, self).__init__(id=id, type='deconv', verbose=verbose) if verbose >= 3: print "... Creating deconv layer" if rng is None: rng = numpy.random create_w = False create_b = False create_bn = False # To copy weights previously created or some wierd initializations if not input_params is None: if input_params[0] is None: create_w = True if input_params[1] is None: create_b = True if batch_norm is True: if input_params[2] is None: create_bn = True else: create_w = True create_b = True create_bn = True mini_batch_size = input_shape[0] channels = input_shape[1] width = input_shape[3] height = input_shape[2] # srng = RandomStreams(rng.randint(1,2147462579)) # Initialize the parameters of this layer. w_shp = (nkerns, output_shape[2], filter_shape[0], filter_shape[1]) o_shp = (input_shape[0], output_shape[2], output_shape[0], output_shape[1]) if create_w is True: self.w = theano.shared(value=numpy.asarray( 0.01 * rng.standard_normal(size=w_shp), dtype=theano.config.floatX), borrow=borrow, name='filterbank') else: self.w = input_params[0] if create_b is True: self.b = theano.shared(value=numpy.zeros( (output_shape[2], ), dtype=theano.config.floatX), name='bias', borrow=borrow) else: self.b = input_params[1] if batch_norm is True: if create_bn is True: self.gamma = theano.shared(value=numpy.ones( (output_shape[2], ), dtype=theano.config.floatX), name='gamma', borrow=borrow) self.beta = theano.shared(value=numpy.zeros( (output_shape[2], ), dtype=theano.config.floatX), name='beta', borrow=borrow) self.running_mean = theano.shared(value=numpy.zeros( (output_shape[2], ), dtype=theano.config.floatX), name='population_mean', borrow=borrow) self.running_var = theano.shared(value=numpy.ones( (output_shape[2], ), dtype=theano.config.floatX), name='population_var', borrow=borrow) else: self.gamma = input_params[2] self.beta = input_params[3] self.running_mean = input_params[4] self.running_var = input_params[5] # Perform the convolution part convolver = deconvolver_2d(input=input, filters=self.w, output_shape=o_shp, subsample=stride, filter_shape=w_shp, image_shape=input_shape, border_mode=border_mode, verbose=verbose) conv_out = convolver.out conv_out_shp = o_shp self.conv_out = conv_out if not poolsize == (1, 1): raise Exception( " Unpool operation not yet supported be deconv layer") """ #pragma: no cover pooler = pooler_2d( input = conv_out, img_shp = conv_out_shp, mode = pooltype, ds = poolsize, verbose = verbose ) pool_out = pooler.out pool_out_shp = pooler.out_shp """ else: unpool_out = conv_out unpool_out_shp = conv_out_shp """ Ioffe, Sergey, and Christian Szegedy. "Batch normalization: Accelerating deep network training by reducing internal covariate shift." arXiv preprint arXiv:1502.03167 (2015). """ if batch_norm is True: batch_norm_out,_,_,mean,var = batch_normalization_train( inputs = unpool_out + \ self.b.dimshuffle('x', 0, 'x', 'x'), gamma = self.gamma, beta = self.beta, axes ='spatial', running_mean = self.running_mean, running_var = self.running_var ) mean = theano.tensor.unbroadcast(mean, 0) var = theano.tensor.unbroadcast(var, 0) var = var + 0.000001 self.updates[self.running_mean] = mean self.updates[self.running_var] = var batch_norm_inference = batch_normalization_test ( inputs = unpool_out + \ self.b.dimshuffle('x', 0, 'x', 'x'), gamma = self.gamma, beta = self.beta, axes = 'spatial', mean = self.running_mean, var = self.running_var ) else: batch_norm_out = unpool_out + self.b.dimshuffle('x', 0, 'x', 'x') batch_norm_inference = batch_norm_out batch_norm_out_shp = unpool_out_shp if type(activation) is tuple: if activation[0] == 'maxout': raise Exception( 'Deconvolution layer does not support maxout activation') self.output, self.output_shape = _activate( x=batch_norm_out, activation=activation, input_size=batch_norm_out_shp, verbose=verbose, dimension=2) self.inference, _ = _activate(x=batch_norm_inference, activation=activation, input_size=batch_norm_out_shp, verbose=verbose, dimension=2) # store parameters of this layer and do some book keeping. self.params = [self.w, self.b] self.active_params = [self.w, self.b] if batch_norm is True: self.params.append(self.gamma) self.params.append(self.beta) self.active_params.append(self.gamma) self.active_params.append(self.beta) self.params.append(self.running_mean) # inactive params self.params.append(self.running_var) # inactive params self.L1 = abs(self.w).sum() # if batch_norm is True : self.L1 = self.L1 # + abs(self.gamma).sum() self.L2 = (self.w**2).sum() # if batch_norm is True: self.L2 = self.L2 # + (self.gamma**2).sum() # Just doing this for print_layer method to use. self.nkerns = nkerns self.filter_shape = filter_shape self.poolsize = poolsize self.stride = stride self.input_shape = input_shape self.num_neurons = nkerns self.activation = activation self.batch_norm = batch_norm
def __init__ (self, input, num_neurons, input_shape, id, rng = None, input_params = None, borrow = True, activation = 'relu', batch_norm = True, verbose = 2 ): super(dot_product_layer,self).__init__(id = id, type = 'dot_product', verbose = verbose) if verbose >= 3: print "... Creating dot product layer" if rng is None: rng = numpy.random if input_params is None: w_values = numpy.asarray(0.01 * rng.standard_normal( size=(input_shape[1], num_neurons)), dtype=theano.config.floatX) if activation == 'sigmoid': w_values*=4 self.w = theano.shared(value=w_values, name='w') b_values = numpy.zeros((num_neurons,), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, name='b') if batch_norm is True: alpha_values = numpy.ones((num_neurons,), dtype = theano.config.floatX) self.alpha = theano.shared(value = alpha_values, name = 'alpha') else: self.w = input_params[0] self.b = input_params[1] if batch_norm is True: self.alpha = input_params[2] dot_product = T.dot(input, self.w) if batch_norm is True: std = dot_product.std( 0 ) mean = dot_product.mean( 0 ) std += 0.001 # To avoid divide by zero like fudge_factor dot_product = dot_product - mean dot_product = dot_product * ( self.alpha / std ) dot_product = dot_product + self.b dot_product_shp = (input_shape[0], num_neurons) self.output, self.output_shape = _activate (x= dot_product, activation = activation, input_size = dot_product_shp, verbose = verbose, dimension = 1) # parameters of the model if batch_norm is True: self.params = [self.w, self.b, self.alpha] else: self.params = [self.w, self.b] self.L1 = abs(self.w).sum() if batch_norm is True: self.L1 = self.L1 + abs(self.alpha).sum() self.L2 = (self.w**2).sum() if batch_norm is True: self.L2 = self.L2 + (self.alpha**2).sum() if verbose >=3: print "... Dot Product layer is created with output shape " + str(self.output_shape) self.num_neurons = num_neurons self.activation = activation self.batch_norm = batch_norm
def __init__( self, input, nkerns, input_shape, id, filter_shape=(3, 3), poolsize=(2, 2), pooltype='max', batch_norm=False, border_mode='valid', stride=(1, 1), rng=None, borrow=True, activation='relu', input_params=None, verbose=2, ): super(conv_pool_layer_2d, self).__init__(id=id, type='conv_pool', verbose=verbose) if verbose >= 3: print "... Creating conv pool layer" if rng is None: rng = numpy.random # To copy weights previously created or some wierd initializations if input_params is not None: init_w = input_params[0] init_b = input_params[1] if batch_norm is True: init_alpha = input_params[2] mini_batch_size = input_shape[0] channels = input_shape[1] width = input_shape[3] height = input_shape[2] # srng = RandomStreams(rng.randint(1,2147462579)) # Initialize the parameters of this layer. w_shp = (nkerns, channels, filter_shape[0], filter_shape[1]) if input_params is None: # I have no idea what this is all about. Saw this being used in theano tutorials, # I am doing the same thing. fan_in = filter_shape[0] * filter_shape[1] fan_out = filter_shape[0] * filter_shape[1] / numpy.prod(poolsize) w_bound = numpy.sqrt(6. / (fan_in + fan_out)) self.w = theano.shared(value=numpy.asarray( rng.uniform(low=-w_bound, high=w_bound, size=w_shp), dtype=theano.config.floatX), borrow=borrow, name='filterbank') self.b = theano.shared(value=numpy.zeros( (w_shp[0]), dtype=theano.config.floatX), name='bias', borrow=borrow) self.alpha = theano.shared(value=numpy.ones( (w_shp[0]), dtype=theano.config.floatX), name='batchnorm', borrow=borrow) else: self.w = init_w self.b = init_b if batch_norm is True: self.alpha = init_alpha # Perform the convolution part convolver = convolver_2d(input=input, filters=self.w, subsample=stride, filter_shape=w_shp, image_shape=input_shape, border_mode=border_mode, verbose=verbose) conv_out = convolver.out conv_out_shp = (mini_batch_size, nkerns, convolver.out_shp[0], convolver.out_shp[1]) self.conv_out = conv_out if not poolsize == (1, 1): pooler = pooler_2d(input=conv_out, img_shp=conv_out_shp, mode=pooltype, ds=poolsize, verbose=verbose) pool_out = pooler.out pool_out_shp = pooler.out_shp else: pool_out = conv_out pool_out_shp = conv_out_shp """ Ioffe, Sergey, and Christian Szegedy. "Batch normalization: Accelerating deep network training by reducing internal covariate shift." arXiv preprint arXiv:1502.03167 (2015). """ if batch_norm is True: mean = pool_out.mean((0, 2, 3), keepdims=True) std = pool_out.std((0, 2, 3), keepdims=True) std += 0.001 # To avoid divide by zero like fudge factor pool_out = pool_out - mean # use one bias for both batch norm and regular bias. batch_norm_out = pool_out * ( self.alpha.dimshuffle('x', 0, 'x', 'x') / std ) + \ self.b.dimshuffle('x', 0, 'x', 'x') else: batch_norm_out = pool_out + self.b.dimshuffle('x', 0, 'x', 'x') batch_norm_out_shp = pool_out_shp self.output, self.output_shape = _activate( x=batch_norm_out, activation=activation, input_size=batch_norm_out_shp, verbose=verbose, dimension=2) # store parameters of this layer and do some book keeping. self.params = [self.w, self.b] if batch_norm is True: self.params.append(self.alpha) self.L1 = abs(self.w).sum() if batch_norm is True: self.L1 = self.L1 + abs(self.alpha).sum() self.L2 = (self.w**2).sum() if batch_norm is True: self.L2 = self.L2 + (self.alpha**2).sum() # Just doing this for print_layer method to use. self.nkerns = nkerns self.filter_shape = filter_shape self.poolsize = poolsize self.stride = stride self.input_shape = input_shape self.num_neurons = nkerns self.activation = activation self.batch_norm = batch_norm
def __init__ ( self, input, nkerns, input_shape, id, filter_shape = (3,3), poolsize = (2,2), pooltype = 'max', batch_norm = False, border_mode = 'valid', stride = (1,1), rng = None, borrow = True, activation = 'relu', input_params = None, verbose = 2, ): super(conv_pool_layer_2d,self).__init__(id = id, type = 'conv_pool', verbose = verbose) if verbose >=3: print "... Creating conv pool layer" if rng is None: rng = numpy.random # To copy weights previously created or some wierd initializations if input_params is not None: init_w = input_params[0] init_b = input_params[1] if batch_norm is True: init_alpha = input_params[2] mini_batch_size = input_shape[0] channels = input_shape[1] width = input_shape[3] height = input_shape[2] # srng = RandomStreams(rng.randint(1,2147462579)) # Initialize the parameters of this layer. w_shp = (nkerns, channels, filter_shape[0], filter_shape[1]) if input_params is None: # I have no idea what this is all about. Saw this being used in theano tutorials, # I am doing the same thing. fan_in = filter_shape[0]*filter_shape[1] fan_out = filter_shape[0]*filter_shape[1] / numpy.prod(poolsize) w_bound = numpy.sqrt(6. / (fan_in + fan_out)) self.w = theano.shared(value= numpy.asarray(rng.uniform(low=-w_bound, high=w_bound, size =w_shp), dtype=theano.config.floatX ), borrow=borrow, name ='w' ) self.b = theano.shared(value=numpy.zeros((w_shp[0]), dtype=theano.config.floatX), name = 'b', borrow=borrow) self.alpha = theano.shared(value=numpy.ones((w_shp[0]), dtype=theano.config.floatX), name = 'alpha', borrow = borrow) else: self.w = init_w self.b = init_b if batch_norm is True: self.alpha = init_alpha # Perform the convolution part convolver = convolver_2d ( input = input, filters = self.w, subsample = stride, filter_shape = w_shp, image_shape = input_shape, border_mode = border_mode, verbose = verbose ) conv_out = convolver.out conv_out_shp = (mini_batch_size, nkerns, convolver.out_shp[0], convolver.out_shp[1]) self.conv_out = conv_out if not poolsize == (1,1): pooler = pooler_2d( input = conv_out, img_shp = conv_out_shp, mode = pooltype, ds = poolsize, verbose = verbose ) pool_out = pooler.out pool_out_shp = pooler.out_shp else: pool_out = conv_out pool_out_shp = conv_out_shp if batch_norm is True: mean = pool_out.mean( (0,2,3), keepdims = True ) std = pool_out.std( (0,2,3), keepdims = True ) std += 0.001 # To avoid divide by zero like fudge factor pool_out = pool_out - mean # use one bias for both batch norm and regular bias. batch_norm_out = pool_out * ( self.alpha.dimshuffle('x', 0, 'x', 'x') / std ) + \ self.b.dimshuffle('x', 0, 'x', 'x') else: batch_norm_out = pool_out + self.b.dimshuffle('x', 0, 'x', 'x') batch_norm_out_shp = pool_out_shp self.output, self.output_shape = _activate (x= batch_norm_out, activation = activation, input_size = batch_norm_out_shp, verbose = verbose, dimension = 2) # store parameters of this layer and do some book keeping. self.params = [self.w, self.b] if batch_norm is True: self.params.append(self.alpha) self.L1 = abs(self.w).sum() if batch_norm is True : self.L1 = self.L1 + abs(self.alpha).sum() self.L2 = (self.w**2).sum() if batch_norm is True: self.L2 = self.L2 + (self.alpha**2).sum() # Just doing this for print_layer method to use. self.nkerns = nkerns self.filter_shape = filter_shape self.poolsize = poolsize self.stride = stride self.input_shape = input_shape self.num_neurons = nkerns self.activation = activation self.batch_norm = batch_norm
def __init__(self, input, num_neurons, input_shape, id, rng=None, input_params=None, borrow=True, activation='relu', batch_norm=True, verbose=2): super(dot_product_layer, self).__init__(id=id, type='dot_product', verbose=verbose) if verbose >= 3: print "... Creating dot product layer" if rng is None: rng = numpy.random create = False if input_params is None: create = True elif input_params[0] is None: create = True if create is True: w_values = numpy.asarray( 0.01 * rng.standard_normal(size=(input_shape[1], num_neurons)), dtype=theano.config.floatX) if activation == 'sigmoid': w_values *= 4 self.w = theano.shared(value=w_values, name='weights') else: self.w = input_params[0] create = False if input_params is None: create = True elif input_params[1] is None: create = True if create is True: b_values = numpy.zeros((num_neurons, ), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, name='bias') else: self.b = input_params[1] if batch_norm is True: create = False if input_params is None: create = True elif input_params[2] is None: create = True if create is True: gamma_values = numpy.ones((1, num_neurons), dtype=theano.config.floatX) self.gamma = theano.shared(value=gamma_values, name='gamma') beta_values = numpy.zeros((1, num_neurons), dtype=theano.config.floatX) self.beta = theano.shared(value=beta_values, name='beta') self.running_mean = theano.shared(value=numpy.zeros( (1, num_neurons), dtype=theano.config.floatX), name='population_mean', borrow=borrow) self.running_var = theano.shared(value=numpy.ones( (1, num_neurons), dtype=theano.config.floatX), name='population_var', borrow=borrow) else: self.gamma = input_params[2] self.beta = input_params[3] self.running_mean = input_params[4] self.running_var = input_params[5] linear_fit = T.dot(input, self.w) + self.b if batch_norm is True: batch_norm_out, _, _, mean, var = batch_normalization_train( inputs=linear_fit, gamma=self.gamma, beta=self.beta, running_mean=self.running_mean, running_var=self.running_var) mean = theano.tensor.unbroadcast(mean, 0) var = theano.tensor.unbroadcast(var, 0) self.updates[self.running_mean] = mean self.updates[self.running_var] = var + 0.001 batch_norm_inference = batch_normalization_test( inputs=linear_fit, gamma=self.gamma, beta=self.beta, mean=self.running_mean, var=self.running_var) else: batch_norm_out = linear_fit batch_norm_inference = batch_norm_out batch_norm_shp = (input_shape[0], num_neurons) self.output, self.output_shape = _activate(x=batch_norm_out, activation=activation, input_size=batch_norm_shp, verbose=verbose, dimension=1) self.inference, _ = _activate(x=batch_norm_out, activation=activation, input_size=batch_norm_shp, verbose=verbose, dimension=1) # parameters of the model if batch_norm is True: self.params = [ self.w, self.b, self.gamma, self.beta, self.running_mean, self.running_var ] self.active_params = [self.w, self.b, self.gamma, self.beta] else: self.params = [self.w, self.b] self.active_params = [self.w, self.b] self.L1 = abs(self.w).sum() # if batch_norm is True: self.L1 = self.L1 + abs(self.gamma).sum() self.L2 = (self.w**2).sum() # if batch_norm is True: self.L2 = self.L2 + (self.gamma**2).sum() """ Ioffe, Sergey, and Christian Szegedy. "Batch normalization: Accelerating deep network training by reducing internal covariate shift." arXiv preprint arXiv:1502.03167 (2015). """ if verbose >= 3: print "... Dot Product layer is created with output shape " + str( self.output_shape) self.num_neurons = num_neurons self.activation = activation self.batch_norm = batch_norm