def _LL_lower_bound_check(model, x, lnZ, conv_thres=0.0001, max_iter=100000): ''' Computes the log likelihood lower bound for x by approximating h1, h2 by Mean field estimates. .. seealso:: AISTATS 2009: Deep Bolzmann machines http://machinelearning.wustl.edu/mlpapers/paper_files/AISTATS09_SalakhutdinovH.pdf :Parameters: model: The model -type: Valid DBM model x: Input states. -type: numpy array [batch size, input dim] lnZ: Logarithm of the patition function. -type: float conv_thres: Convergence threshold for the mean field approximation -type: float max_iter: If convergence threshold not reached, maximal number of sampling steps -type: int :Returns: Log likelihood lower bound for x. -type: numpy array [batch size, 1] ''' # Pre calc activation from x since it is constant id1 = numx.dot(x - model.o1, model.W1) # Initialize mu3 with its mean d3 = numx.zeros((x.shape[0], model.hidden2_dim)) d2 = numx.zeros((x.shape[0], model.hidden1_dim)) # While convergence of max number of iterations not reached, # run mean field estimation for i in range(x.shape[0]): d3_temp = numx.copy(model.o3) d2_temp = 0.0 d2_new = Sigmoid.f(id1[i, :] + numx.dot(d3_temp - model.o3, model.W2.T) + model.b2) d3_new = Sigmoid.f(numx.dot(d2_new - model.o2, model.W2) + model.b3) while numx.max(numx.abs(d2_new - d2_temp)) > conv_thres or numx.max( numx.abs(d3_new - d3_temp)) > conv_thres: d2_temp = d2_new d3_temp = d3_new d2_new = Sigmoid.f(id1[i, :] + numx.dot(d3_new - model.o3, model.W2.T) + model.b2) d3_new = Sigmoid.f( numx.dot(d2_new - model.o2, model.W2) + model.b3) d2[i] = numx.clip(d2_new, 0.0000000000000001, 0.9999999999999999).reshape(1, model.hidden1_dim) d3[i] = numx.clip(d3_new, 0.0000000000000001, 0.9999999999999999).reshape(1, model.hidden2_dim) # Return ernegy of states + the entropy of h1.h2 due to the mean field approximation return -model.energy(x, d2, d3) - lnZ - numx.sum( d2 * numx.log(d2) + (1.0 - d2) * numx.log(1.0 - d2), axis=1).reshape( x.shape[0], 1) - numx.sum(d3 * numx.log(d3) + (1.0 - d3) * numx.log(1.0 - d3), axis=1).reshape(x.shape[0], 1)
def sample(self, activation): ''' This function samples states from the activation. :Parameters: activation: pre and post synaptiv activation. -type: list len(2) of numpy arrays [batch_size, input dim] ''' # numx.clip(a=activation[1], a_min=-1.0, a_max=1.0, out=activation[1]) activation3 = numx.maximum(0.0, activation[1] + numx.random.randn(activation[1].shape[0], activation[1].shape[1]) * numx.sqrt( Sigmoid.f(activation[1]))) activation3 = numx.minimum(1.0, activation3) # activation3 = activation[1] + numx.random.randn(activation[1].shape[0],activation[1].shape[1]) * numx.sqrt(Sigmoid.f(activation[1])) # activation3 = numx.maximum(0.0,activation[1] + numx.random.randn(activation[1].shape[0],activation[1].shape[1]) * numx.sqrt(Sigmoid.f(activation[1]))) # numx.clip(a = activation3,a_min=0.0,a_max=1.0,out = activation3) return activation3
def activation(self, bottom_up_states, top_down_states, bottom_up_pre=None, top_down_pre=None): ''' Calculates the pre and post synaptic activation. :Parameters: bottom_up_states: activation comming from previous layer. -type: numpy array [batch_size, input dim] top_down_states: activation comming from next layer. -type: numpy array [batch_size, input dim] bottom_up_pre: pre-activation comming from previous layer of None. if given this pre activation is used to avoid re-caluclations. -type: None or numpy array [batch_size, input dim] top_down_pre: pre-activation comming from next layer of None. if given this pre activation is used to avoid re-caluclations. -type: None or numpy array [batch_size, input dim] :Returns: Pre and post synaptic activation for this layer. -type: numpy array [batch_size, input dim] ''' pre_act = 0.0 if self.input_weight_layer is not None: if bottom_up_pre is None: pre_act += self.input_weight_layer.propagate_up( bottom_up_states) else: pre_act += bottom_up_pre if self.output_weight_layer is not None: if top_down_pre is None: pre_act += self.output_weight_layer.propagate_down( top_down_states) else: pre_act += top_down_pre pre_act += self.bias return Sigmoid.f(pre_act), pre_act
def test___init__(self): sys.stdout.write( 'BipartiteGraph -> Performing BipartiteGraph initialzation test ...' ) sys.stdout.flush() # Check init scalar number_visibles = 3 number_hiddens = 2 numx.random.seed(42) model = BipartiteGraph(number_visibles=number_visibles, number_hiddens=number_hiddens, data=None, initial_weights=numx.random.randn(), initial_visible_bias=numx.random.randn(), initial_hidden_bias=numx.random.randn(), initial_visible_offsets=numx.random.randn(), initial_hidden_offsets=numx.random.randn()) numx.random.seed(42) initial_weights = numx.random.randn() initial_visible_bias = numx.random.randn() * numx.ones( (1, number_visibles)) initial_hidden_bias = numx.random.randn() * numx.ones( (1, number_hiddens)) initial_visible_offsets = numx.random.randn() * numx.ones( (1, number_visibles)) initial_hidden_offsets = numx.random.randn() * numx.ones( (1, number_hiddens)) initial_weights = numx.random.randn(number_visibles, number_hiddens) * initial_weights assert numx.all(model.input_dim == number_visibles) assert numx.all(model.output_dim == number_hiddens) assert numx.all(model.w == initial_weights) assert numx.all(model.bv == initial_visible_bias) assert numx.all(model.bh == initial_hidden_bias) assert numx.all(model.ov == initial_visible_offsets) assert numx.all(model.oh == initial_hidden_offsets) # Check init arrays numx.random.seed(42) initial_weights = numx.random.randn(number_visibles, number_hiddens) initial_visible_bias = numx.random.randn(1, number_visibles) initial_hidden_bias = numx.random.randn(1, number_hiddens) initial_visible_offsets = numx.random.randn(1, number_visibles) initial_hidden_offsets = numx.random.randn(1, number_hiddens) numx.random.seed(42) model = BipartiteGraph(number_visibles=number_visibles, number_hiddens=number_hiddens, data=None, initial_weights=initial_weights, initial_visible_bias=initial_visible_bias, initial_hidden_bias=initial_hidden_bias, initial_visible_offsets=initial_visible_offsets, initial_hidden_offsets=initial_hidden_offsets) numx.random.seed(42) assert numx.all(model.w == initial_weights) assert numx.all(model.bv == initial_visible_bias) assert numx.all(model.bh == initial_hidden_bias) assert numx.all(model.ov == initial_visible_offsets) assert numx.all(model.oh == initial_hidden_offsets) # Check AUTO init without data numx.random.seed(42) initial_weights = ( 2.0 * numx.random.rand(number_visibles, number_hiddens) - 1.0) * (4.0 * numx.sqrt(6.0 / (number_visibles + number_hiddens))) initial_visible_bias = 'AUTO' initial_hidden_bias = 'AUTO' initial_visible_offsets = 'AUTO' initial_hidden_offsets = 'AUTO' numx.random.seed(42) model = BipartiteGraph(number_visibles=number_visibles, number_hiddens=number_hiddens, data=None, initial_weights='AUTO', initial_visible_bias='AUTO', initial_hidden_bias='AUTO', initial_visible_offsets='AUTO', initial_hidden_offsets='AUTO') assert numx.all(model.w == initial_weights) assert numx.all(model.bv == 0.0) assert numx.all(model.bh == 0.0) assert numx.all(model.ov == 0.5) assert numx.all(model.oh == 0.5) # Check AUTO init with data test_data = numx.random.randn(100, number_visibles) test_data_mean = test_data.mean(axis=0).reshape(1, test_data.shape[1]) numx.random.seed(42) # All weight combination checked already initial_visible_bias = Sigmoid().g( numx.clip(test_data_mean, 0.001, 0.9999)).reshape(model.ov.shape) initial_hidden_bias = 0.0 initial_visible_offsets = test_data_mean initial_hidden_offsets = 0.5 numx.random.seed(42) model = BipartiteGraph(number_visibles=number_visibles, number_hiddens=number_hiddens, data=test_data, initial_weights='AUTO', initial_visible_bias='AUTO', initial_hidden_bias='AUTO', initial_visible_offsets='AUTO', initial_hidden_offsets='AUTO') assert numx.all(model.bv == initial_visible_bias) assert numx.all(model.bh == initial_hidden_bias) assert numx.all(model.ov == initial_visible_offsets) assert numx.all(model.oh == initial_hidden_offsets) # Check AUTO init with INVERSE SIGMOID test_data = numx.random.randn(100, number_visibles) test_data_mean = test_data.mean(axis=0).reshape(1, test_data.shape[1]) numx.random.seed(42) # All weight combination checked already initial_visible_offsets = numx.random.randn() * numx.ones( (1, number_visibles)) initial_hidden_offsets = numx.random.randn() * numx.ones( (1, number_hiddens)) initial_visible_bias = numx.array(Sigmoid().g(numx.clip( \ initial_visible_offsets, 0.001, 0.9999)) ).reshape(1, number_visibles) initial_hidden_bias = numx.array(Sigmoid().g(numx.clip( \ initial_hidden_offsets, 0.001, 0.9999)) ).reshape(1, number_hiddens) numx.random.seed(42) model = BipartiteGraph( number_visibles=number_visibles, number_hiddens=number_hiddens, data=test_data, initial_weights='AUTO', initial_visible_bias='INVERSE_SIGMOID', initial_hidden_bias='INVERSE_SIGMOID', initial_visible_offsets=numx.random.randn() * numx.ones( (1, number_visibles)), initial_hidden_offsets=numx.random.randn() * numx.ones( (1, number_hiddens))) assert numx.all(model.bv == initial_visible_bias) assert numx.all(model.bh == initial_hidden_bias) assert numx.all(model.ov == initial_visible_offsets) assert numx.all(model.oh == initial_hidden_offsets) print(' successfully passed!') sys.stdout.flush()
def __init__(self, number_visibles, number_hiddens, data=None, visible_activation_function=Sigmoid, hidden_activation_function=Sigmoid, initial_weights='AUTO', initial_visible_bias='AUTO', initial_hidden_bias='AUTO', initial_visible_offsets='AUTO', initial_hidden_offsets='AUTO', dtype=numx.float64): """ This function initializes all necessary parameters and data structures. It is recommended to pass the \ training data to initialize the network automatically. :param number_visibles: Number of the visible variables. :type number_visibles: int :param number_hiddens: Number of the hidden variables. :type number_hiddens: int :param data: The training data for parameter initialization if 'AUTO' is chosen for the corresponding parameter. :type data: None or numpy array [num samples, input dim] :param visible_activation_function: Activation function for the visible units. :type visible_activation_function: pydeep.base.activationFunction :param hidden_activation_function: Activation function for the hidden units. :type hidden_activation_function: pydeep.base.activationFunction :param initial_weights: Initial weights. 'AUTO' and a scalar are random init. :type initial_weights: 'AUTO', scalar or numpy array [input dim, output_dim] :param initial_visible_bias: Initial visible bias. 'AUTO' is random, 'INVERSE_SIGMOID' is the inverse Sigmoid \ of the visible mean. If a scalar is passed all values are initialized with it. :type initial_visible_bias: 'AUTO','INVERSE_SIGMOID', scalar or numpy array [1, input dim] :param initial_hidden_bias: Initial hidden bias. 'AUTO' is random, 'INVERSE_SIGMOID' is the inverse Sigmoid of \ the hidden mean. If a scalar is passed all values are initialized with it. :type initial_hidden_bias: 'AUTO','INVERSE_SIGMOID', scalar or numpy array [1, output_dim] :param initial_visible_offsets: Initial visible offset values. AUTO=data mean or 0.5 if no data is given. If a \ scalar is passed all values are initialized with it :type initial_visible_offsets: 'AUTO', scalar or numpy array [1, input dim] :param initial_hidden_offsets: Initial hidden offset values. AUTO = 0.5 If a scalar is passed all values are \ initialized with it. :type initial_hidden_offsets: 'AUTO', scalar or numpy array [1, output_dim] :param dtype: Used data type i.e. numpy.float64. :type dtype: numpy.float32 or numpy.float64 or numpy.float128 """ # Set internal datatype self.dtype = dtype # Set input and output dimension self.input_dim = number_visibles self.output_dim = number_hiddens self.visible_activation_function = visible_activation_function self.hidden_activation_function = hidden_activation_function self._data_mean = 0.5 * numx.ones((1, self.input_dim), self.dtype) self._data_std = numx.ones((1, self.input_dim), self.dtype) if data is not None: if isinstance(data, list): data = numx.concatenate(data) if self.input_dim != data.shape[1]: raise ex.ValueError( "Data dimension and model input dimension have to be equal!" ) self._data_mean = data.mean(axis=0).reshape(1, data.shape[1]) self._data_std = data.std(axis=0).reshape(1, data.shape[1]) # AUTO -> Small random values out of # +-4*numx.sqrt(6/(self.input_dim+self.output_dim) # Scalar -> Small Gaussian distributed random values with std_dev # initial_weights # Array -> The corresponding values are used if initial_weights is 'AUTO': self.w = numx.array( (2.0 * numx.random.rand(self.input_dim, self.output_dim) - 1.0) * (4.0 * numx.sqrt(6.0 / (self.input_dim + self.output_dim))), dtype=dtype) else: if numx.isscalar(initial_weights): self.w = numx.array( numx.random.randn(self.input_dim, self.output_dim) * initial_weights, dtype=dtype) else: self.w = numx.array(initial_weights, dtype=dtype) # AUTO -> data != None -> Initialized to the data mean # data == None -> Initialized to Visible range mean # Scalar -> Initialized to given value # Array -> The corresponding values are used self.ov = numx.zeros((1, self.input_dim)) if initial_visible_offsets is 'AUTO': if data is not None: self.ov += self._data_mean else: self.ov += 0.5 else: if numx.isscalar(initial_visible_offsets): self.ov += initial_visible_offsets else: self.ov += initial_visible_offsets.reshape(1, self.input_dim) self.ov = numx.array(self.ov, dtype=dtype) # AUTO -> data != None -> Initialized to the inverse sigmoid of # data mean # data == Initialized to randn()*0.01 # Scalar -> Initialized to given value + randn()*0.01 # Array -> The corresponding values are used if initial_visible_bias is 'AUTO': if data is None: self.bv = numx.zeros((1, self.input_dim)) else: self.bv = numx.array(Sigmoid.g( numx.clip(self._data_mean, 0.001, 0.999)), dtype=dtype).reshape(self.ov.shape) else: if initial_visible_bias is 'INVERSE_SIGMOID': self.bv = numx.array(Sigmoid.g(numx.clip( self.ov, 0.001, 0.999)), dtype=dtype).reshape(1, self.input_dim) else: if numx.isscalar(initial_visible_bias): self.bv = numx.array(initial_visible_bias + numx.zeros( (1, self.input_dim)), dtype=dtype) else: self.bv = numx.array(initial_visible_bias, dtype=dtype) # AUTO -> Initialized to Hidden range mean # Scalar -> Initialized to given value # Array -> The corresponding values are used self.oh = numx.zeros((1, self.output_dim)) if initial_hidden_offsets is 'AUTO': self.oh += 0.5 else: if numx.isscalar(initial_hidden_offsets): self.oh += initial_hidden_offsets else: self.oh += initial_hidden_offsets.reshape(1, self.output_dim) self.oh = numx.array(self.oh, dtype=dtype) # AUTO -> Initialized to randn()*0.01 # Scalar -> Initialized to given value + randn()*0.01 # Array -> The corresponding values are used if initial_hidden_bias is 'AUTO': self.bh = numx.zeros((1, self.output_dim)) else: if initial_hidden_bias is 'INVERSE_SIGMOID': self.bh = numx.array(Sigmoid.g(numx.clip( self.oh, 0.001, 0.999)), dtype=dtype).reshape(self.oh.shape) else: if numx.isscalar(initial_hidden_bias): self.bh = numx.array(initial_hidden_bias + numx.zeros( (1, self.output_dim)), dtype=dtype) else: self.bh = numx.array(initial_hidden_bias, dtype=dtype)
def _add_hidden_units(self, num_new_hiddens, position=0, initial_weights='AUTO', initial_bias='AUTO', initial_offsets='AUTO'): """ This function adds new hidden units at the given position to the model. \ .. Warning:: If the parameters are changed. the trainer needs to be reinitialized. :param num_new_hiddens: The number of new hidden units to add. :type num_new_hiddens: int :param position: Position where the units should be added. :type position: int :param initial_weights: The initial weight values for the hidden units. :type initial_weights: 'AUTO' or scalar or numpy array [input_dim, num_new_hiddens] :param initial_bias: The initial hidden bias values. :type initial_bias: 'AUTO' or scalar or numpy array [1, num_new_hiddens] :param initial_offsets: The initial hidden mean values. :type initial_offsets: 'AUTO' or scalar or numpy array [1, num_new_hiddens] """ # AUTO -> Small random values out of # +-4*numx.sqrt(6/(self.input_dim+self.output_dim) # Scalar -> Small Gaussian distributed random values with std_dev # initial_weights # Array -> The corresponding values are used if initial_weights is 'AUTO': new_weights = ( (2.0 * numx.random.rand(self.input_dim, num_new_hiddens) - 1.0) * (4.0 * numx.sqrt( 6.0 / (self.input_dim + self.output_dim + num_new_hiddens)))) else: if numx.isscalar(initial_weights): new_weights = numx.random.randn( self.input_dim, num_new_hiddens) * initial_weights else: new_weights = initial_weights self.w = numx.array( numx.insert(self.w, numx.ones(num_new_hiddens) * position, new_weights, axis=1), self.dtype) # AUTO -> Initialized to Hidden range mean # Scalar -> Initialized to given value # Array -> The corresponding values are used if initial_offsets is 'AUTO': new_oh = numx.zeros((1, num_new_hiddens)) + 0.5 else: if numx.isscalar(initial_offsets): new_oh = numx.zeros((1, num_new_hiddens)) + initial_offsets else: new_oh = initial_offsets self.oh = numx.array( numx.insert(self.oh, numx.ones(num_new_hiddens) * position, new_oh, axis=1), self.dtype) # AUTO -> Initialized to randn()*0.01 # Scalar -> Initialized to given value + randn()*0.01 # Array -> The corresponding values are used if initial_bias is 'AUTO': new_bias = numx.zeros((1, num_new_hiddens)) else: if initial_bias is 'INVERSE_SIGMOID': new_bias = Sigmoid.g(numx.clip(new_oh, 0.01, 0.99)).reshape(new_oh.shape) else: if numx.isscalar(initial_bias): new_bias = initial_bias + numx.zeros((1, num_new_hiddens)) else: new_bias = numx.array(initial_bias, dtype=self.dtype) self.bh = numx.array( numx.insert(self.bh, numx.ones(num_new_hiddens) * position, new_bias, axis=1), self.dtype) self.output_dim = self.w.shape[1]
offset_typ = 'DDD' dbm = MODEL.BinaryBinaryDBM(N, M, O, offset_typ, train_set) # Set the same seed value for all algorithms numx.random.seed(42) # Initialize parameters dbm.W1 = numx.random.randn(N, M) * 0.01 dbm.W2 = numx.random.randn(M, O) * 0.01 dbm.o1 = numx.mean(train_set, axis=0).reshape(1, N) dbm.o2 = numx.zeros((1, M)) + 0.5 dbm.o3 = numx.zeros((1, O)) + 0.5 dbm.b1 = Sigmoid.g(numx.clip(dbm.o1, 0.001, 0.999)) dbm.b2 = Sigmoid.g(numx.clip(dbm.o2, 0.001, 0.999)) dbm.b3 = Sigmoid.g(numx.clip(dbm.o3, 0.001, 0.999)) # Initialize negative Markov chain dbm.m1 = dbm.o1 + numx.zeros((batch_size, N)) dbm.m2 = dbm.o2 + numx.zeros((batch_size, M)) dbm.m3 = dbm.o3 + numx.zeros((batch_size, O)) # Choose trainer CD, PCD, PT trainer = TRAINER.PCD(dbm, batch_size) # Set AIS betas / inv. temps for AIS a = numx.linspace(0.0, 0.5, 100 + 1) a = a[0:a.shape[0] - 1] b = numx.linspace(0.5, 0.9, 800 + 1)
def __init__(self, number_visibles, number_hiddens, data = None, visible_activation_function = Sigmoid, hidden_activation_function = Sigmoid, initial_weights = 'AUTO', initial_visible_bias = 'AUTO', initial_hidden_bias = 'AUTO', initial_visible_offsets = 'AUTO', initial_hidden_offsets = 'AUTO', dtype = numx.float64): ''' This function initializes all necessary parameters and data structures. It is recommended to pass the training data to initialize the network automatically. :Parameters: number_visibles: Number of the visible variables. -type: int number_hiddens Number of hidden variables. -type: int data: The training data for parameter initialization if 'AUTO' is chosen for the corresponding parameter. -type: None or numpy array [num samples, input dim] visible_activation_function Activation function for the visible units. -type: ActivationFunction hidden_activation_function Activation function for the hidden units. -type: ActivationFunction initial_weights: Initial weights. 'AUTO' and a scalar are random init. -type: 'AUTO', scalar or numpy array [input dim, output_dim] initial_visible_bias: Initial visible bias. 'AUTO' is random, 'INVERSE_SIGMOID' is the inverse Sigmoid of the visilbe mean. If a scalar is passed all values are initialized with it. -type: 'AUTO','INVERSE_SIGMOID', scalar or numpy array [1, input dim] initial_hidden_bias: Initial hidden bias. 'AUTO' is random, 'INVERSE_SIGMOID' is the inverse Sigmoid of the hidden mean. If a scalar is passed all values are initialized with it. -type: 'AUTO','INVERSE_SIGMOID', scalar or numpy array [1, output_dim] initial_visible_offsets: Initial visible offset values. AUTO=data mean or 0.5 if no data is given. If a scalar is passed all values are initialized with it. -type: 'AUTO', scalar or numpy array [1, input dim] initial_hidden_offsets: Initial hidden offset values. AUTO = 0.5 If a scalar is passed all values are initialized with it. -type: 'AUTO', scalar or numpy array [1, output_dim] dtype: Used data type i.e. numpy.float64 -type: numpy.float32 or numpy.float64 or numpy.float128 ''' # Set internal datatype self.dtype = dtype # Set input and output dimension self.input_dim = number_visibles self.output_dim = number_hiddens self.visible_activation_function = visible_activation_function self.hidden_activation_function = hidden_activation_function self._data_mean = 0.5*numx.ones((1, self.input_dim),self.dtype) self._data_std = numx.ones((1, self.input_dim),self.dtype) if data is not None: if isinstance(data,list): data = numx.concatenate(data) if self.input_dim != data.shape[1]: raise ex.ValueError("Data dimension and model input \ dimension have to be equal!") self._data_mean = data.mean(axis=0).reshape(1,data.shape[1]) self._data_std = data.std(axis=0).reshape(1,data.shape[1]) # AUTO -> Small random values out of # +-4*numx.sqrt(6/(self.input_dim+self.output_dim) # Scalar -> Small Gaussian distributed random values with std_dev # initial_weights # Array -> The corresponding values are used if initial_weights is 'AUTO': self.w = numx.array((2.0 * numx.random.rand(self.input_dim, self.output_dim) - 1.0) * (4.0 * numx.sqrt(6.0 / (self.input_dim + self.output_dim))) ,dtype=dtype) else: if(numx.isscalar(initial_weights)): self.w = numx.array(numx.random.randn(self.input_dim, self.output_dim) * initial_weights, dtype=dtype) else: self.w = numx.array(initial_weights, dtype=dtype) # AUTO -> data != None -> Initialized to the data mean # data == None -> Initialized to Visible range mean # Scalar -> Initialized to given value # Array -> The corresponding values are used self.ov = numx.zeros((1,self.input_dim)) if initial_visible_offsets is 'AUTO': if data is not None: self.ov += self._data_mean else: self.ov += 0.5 else: if(numx.isscalar(initial_visible_offsets)): self.ov += initial_visible_offsets else: self.ov += initial_visible_offsets.reshape(1,self.input_dim) self.ov = numx.array(self.ov, dtype=dtype) # AUTO -> data != None -> Initialized to the inverse sigmoid of # data mean # data == Initialized to randn()*0.01 # Scalar -> Initialized to given value + randn()*0.01 # Array -> The corresponding values are used if initial_visible_bias is 'AUTO': if data is None: self.bv = numx.zeros((1, self.input_dim)) else: self.bv = numx.array(Sigmoid.g(numx.clip(self._data_mean,0.001, 0.999)), dtype=dtype).reshape(self.ov.shape) else: if initial_visible_bias is 'INVERSE_SIGMOID': self.bv = numx.array(Sigmoid.g(numx.clip(self.ov,0.001, 0.999)), dtype=dtype).reshape(1,self.input_dim) else: if(numx.isscalar(initial_visible_bias)): self.bv = numx.array(initial_visible_bias + numx.zeros((1, self.input_dim)) , dtype=dtype) else: self.bv = numx.array(initial_visible_bias, dtype=dtype) # AUTO -> Initialized to Hidden range mean # Scalar -> Initialized to given value # Array -> The corresponding values are used self.oh = numx.zeros((1,self.output_dim)) if initial_hidden_offsets is 'AUTO': self.oh += 0.5 else: if(numx.isscalar(initial_hidden_offsets)): self.oh += initial_hidden_offsets else: self.oh += initial_hidden_offsets.reshape(1,self.output_dim) self.oh = numx.array(self.oh, dtype=dtype) # AUTO -> Initialized to randn()*0.01 # Scalar -> Initialized to given value + randn()*0.01 # Array -> The corresponding values are used if initial_hidden_bias is 'AUTO': self.bh = numx.zeros((1, self.output_dim)) else: if initial_hidden_bias is 'INVERSE_SIGMOID': self.bh = numx.array( Sigmoid.g(numx.clip(self.oh,0.001,0.999)) , dtype=dtype).reshape(self.oh.shape) else: if(numx.isscalar(initial_hidden_bias)): self.bh = numx.array(initial_hidden_bias + numx.zeros((1, self.output_dim)) , dtype=dtype) else: self.bh = numx.array(initial_hidden_bias, dtype=dtype)
def _add_hidden_units(self, num_new_hiddens, position = 0, initial_weights='AUTO', initial_bias='AUTO', initial_offsets = 'AUTO'): ''' This function adds new hidden units at the given position to the model. Warning: If the parameters are changed. the trainer needs to be reinitialized. :Parameters: num_new_hiddens: The number of new hidden units to add. -type: int position: Position where the units should be added. -type: int initial_weights: The initial weight values for the hidden units. -type: 'AUTO' or scalar or numpy array [input_dim, num_new_hiddens] initial_bias: The initial hidden bias values. -type: 'AUTO' or scalar or numpy array [1, num_new_hiddens] initial_offsets: The initial hidden mean values. -type: 'AUTO' or scalar or numpy array [1, num_new_hiddens] ''' # AUTO -> Small random values out of # +-4*numx.sqrt(6/(self.input_dim+self.output_dim) # Scalar -> Small Gaussian distributed random values with std_dev # initial_weights # Array -> The corresponding values are used new_weights = None if initial_weights == 'AUTO': new_weights = ((2.0 * numx.random.rand(self.input_dim, num_new_hiddens) - 1.0) * (4.0 * numx.sqrt(6.0 / (self.input_dim + self.output_dim + num_new_hiddens)))) else: if(numx.isscalar(initial_weights)): new_weights = numx.random.randn(self.input_dim, num_new_hiddens) * initial_weights else: new_weights = initial_weights self.w = numx.array(numx.insert(self.w, numx.ones((num_new_hiddens) ) * position, new_weights, axis=1),self.dtype) # AUTO -> Initialized to Hidden range mean # Scalar -> Initialized to given value # Array -> The corresponding values are used new_oh = None if initial_offsets == 'AUTO': new_oh = numx.zeros((1, num_new_hiddens)) + 0.5 else: if(numx.isscalar(initial_offsets)): new_oh = numx.zeros((1, num_new_hiddens)) + initial_offsets else: new_oh = initial_offsets self.oh = numx.array(numx.insert(self.oh, numx.ones((num_new_hiddens) ) * position, new_oh, axis=1),self.dtype) # AUTO -> Initialized to randn()*0.01 # Scalar -> Initialized to given value + randn()*0.01 # Array -> The corresponding values are used if initial_bias == 'AUTO': new_bias = numx.zeros((1, num_new_hiddens)) else: if initial_bias == 'INVERSE_SIGMOID': new_bias = Sigmoid.g(numx.clip(new_oh,0.01, 0.99)).reshape(new_oh.shape) else: if(numx.isscalar(initial_bias)): new_bias = initial_bias + numx.zeros((1, num_new_hiddens)) else: new_bias = numx.array(initial_bias, dtype=self.dtype) self.bh = numx.array(numx.insert(self.bh, numx.ones((num_new_hiddens) ) * position, new_bias, axis=1),self.dtype) self.output_dim = self.w.shape[1]
def __init__(self, input_weight_layer, output_weight_layer, data=None, initial_bias='AUTO', initial_offsets='AUTO', dtype=numx.float64): ''' This function initializes the weight layer. :Parameters: input_weight_layer: Referenz to the input weights. -type: Weight_layer or None output_weight_layer Referenz to the output weights. -type: Weight_layer or None data: The training data for initializing the visible bias. -type: None or numpy array [num samples, input dim] or List of numpy arrays [num samples, input dim] initial_bias: Initial visible bias. -type: 'AUTO', scalar or numpy array [1,input dim] initial_offsets: Initial visible mean values. -type: 'AUTO', scalar or numpy array [1, input dim] dtype: Used data type i.e. numpy.float64 -type: numpy.float32 or numpy.float64 or numpy.float128 ''' # Set internal datatype self.dtype = dtype # Set input output dimesnions self.input_weight_layer = input_weight_layer self.output_weight_layer = output_weight_layer # Check that input and output layer match, which has not always to be the case e.g. SoftmaxUnitLayer if input_weight_layer != None: self.input_dim = input_weight_layer.output_dim self.output_dim = self.input_dim else: if output_weight_layer != None: self.output_dim = output_weight_layer.input_dim self.input_dim = self.output_dim else: raise NotImplementedError( "Unit layer needs at least one connected weight layer!") if data is not None: if isinstance(data, list): data = numx.concatenate(data) if self.input_dim != data.shape[1]: raise ValueError("Data dimension and model input \ dimension have to be equal!") data_mean = numx.mean(data, axis=0).reshape(1, data.shape[1]) # AUTO -> data is not None -> Initialized to the data mean # data is None -> Initialized to Visible range mean # Scalar -> Initialized to given value # Array -> The corresponding values are used self.offset = numx.zeros((1, self.input_dim)) if initial_offsets is 'AUTO': if data is not None: self.offset += data_mean else: self.offset += 0.5 else: if (numx.isscalar(initial_offsets)): self.offset += initial_offsets else: self.offset += initial_offsets.reshape(1, self.input_dim) self.offset = numx.array(self.offset, dtype=dtype) # AUTO -> data is not None -> Initialized to the inverse sigmoid of # data mean # data is Initialized to randn()*0.01 # Scalar -> Initialized to given value + randn()*0.01 # Array -> The corresponding values are used self.bias = numx.zeros((1, self.input_dim)) if initial_bias is 'AUTO': if data is not None: self.bias = numx.array(Sigmoid.g( numx.clip(data_mean, 0.001, 0.999)), dtype=dtype).reshape(self.offset.shape) else: if initial_bias is 'INVERSE_SIGMOID': self.bias = numx.array(Sigmoid.g( numx.clip(self.offset, 0.001, 0.999)), dtype=dtype).reshape(1, self.input_dim) else: if (numx.isscalar(initial_bias)): self.bias = numx.array(initial_bias + numx.zeros( (1, self.input_dim)), dtype=dtype) else: self.bias = numx.array(initial_bias, dtype=dtype)
def train(self, data, epsilon, k=[3, 1], offset_typ='DDD', meanfield=False): #positive phase id1 = numx.dot(data - self.model.o1, self.model.W1) d3 = numx.copy(self.model.o3) d2 = numx.copy(self.model.o2) #for _ in range(k[0]): if meanfield == False: for _ in range(k[0]): d2 = Sigmoid.f(id1 + numx.dot(d3 - self.model.o3, self.model.W2.T) + self.model.b2) d2 = self.model.dtype(d2 > numx.random.random(d2.shape)) d3 = Sigmoid.f( numx.dot(d2 - self.model.o2, self.model.W2) + self.model.b3) d3 = self.model.dtype(d3 > numx.random.random(d3.shape)) else: if meanfield == True: for _ in range(k[0]): d2 = Sigmoid.f(id1 + numx.dot(d3 - self.model.o3, self.model.W2.T) + self.model.b2) d3 = Sigmoid.f( numx.dot(d2 - self.model.o2, self.model.W2) + self.model.b3) else: d2_new = Sigmoid.f(id1 + numx.dot(d3 - self.model.o3, self.model.W2.T) + self.model.b2) d3_new = Sigmoid.f( numx.dot(d2_new - self.model.o2, self.model.W2) + self.model.b3) while numx.max(numx.abs(d2_new - d2)) > meanfield or numx.max( numx.abs(d3_new - d3)) > meanfield: d2 = d2_new d3 = d3_new d2_new = Sigmoid.f( id1 + numx.dot(d3_new - self.model.o3, self.model.W2.T) + self.model.b2) d3_new = Sigmoid.f( numx.dot(d2_new - self.model.o2, self.model.W2) + self.model.b3) d2 = d2_new d3 = d3_new self.sampler.model = RBM_MODEL.BinaryBinaryRBM( number_visibles=self.model.input_dim + self.model.hidden2_dim, number_hiddens=self.model.hidden1_dim, data=None, initial_weights=numx.vstack((self.model.W1, self.model.W2.T)), initial_visible_bias=numx.hstack((self.model.b1, self.model.b3)), initial_hidden_bias=self.model.b2, initial_visible_offsets=numx.hstack( (self.model.o1, self.model.o3)), initial_hidden_offsets=self.model.o2) if isinstance(self.sampler, RBM_SAMPLER.GibbsSampler): sample = self.sampler.sample(numx.hstack((data, d3))) else: sample = self.sampler.sample(self.batch_size, k[1]) self.m2 = self.sampler.model.probability_h_given_v(sample) self.m1 = sample[:, 0:self.model.input_dim] self.m3 = sample[:, self.model.input_dim:] # Estimate new means new_o1 = 0 if offset_typ[0] is 'D': new_o1 = data.mean(axis=0) if offset_typ[0] is 'A': new_o1 = (self.m1.mean(axis=0) + data.mean(axis=0)) / 2.0 if offset_typ[0] is 'M': new_o1 = self.m1.mean(axis=0) new_o2 = 0 if offset_typ[1] is 'D': new_o2 = d2.mean(axis=0) if offset_typ[1] is 'A': new_o2 = (self.m2.mean(axis=0) + d2.mean(axis=0)) / 2.0 if offset_typ[1] is 'M': new_o2 = self.m2.mean(axis=0) new_o3 = 0 if offset_typ[2] is 'D': new_o3 = d3.mean(axis=0) if offset_typ[2] is 'A': new_o3 = (self.m3.mean(axis=0) + d3.mean(axis=0)) / 2.0 if offset_typ[2] is 'M': new_o3 = self.m3.mean(axis=0) # Reparameterize self.model.b1 += epsilon[6] * numx.dot(new_o2 - self.model.o2, self.model.W1.T) self.model.b2 += epsilon[5] * numx.dot( new_o1 - self.model.o1, self.model.W1) + epsilon[7] * numx.dot( new_o3 - self.model.o3, self.model.W2.T) self.model.b3 += epsilon[7] * numx.dot(new_o2 - self.model.o2, self.model.W2) # Shift means self.model.o1 = (1.0 - epsilon[5]) * self.model.o1 + epsilon[5] * new_o1 self.model.o2 = (1.0 - epsilon[6]) * self.model.o2 + epsilon[6] * new_o2 self.model.o3 = (1.0 - epsilon[7]) * self.model.o3 + epsilon[7] * new_o3 # Calculate gradients dW1 = (numx.dot( (data - self.model.o1).T, d2 - self.model.o2) - numx.dot( (self.m1 - self.model.o1).T, self.m2 - self.model.o2)) dW2 = (numx.dot((d2 - self.model.o2).T, d3 - self.model.o3) - numx.dot( (self.m2 - self.model.o2).T, self.m3 - self.model.o3)) db1 = (numx.sum(data - self.m1, axis=0)).reshape(1, self.model.input_dim) db2 = (numx.sum(d2 - self.m2, axis=0)).reshape(1, self.model.hidden1_dim) db3 = (numx.sum(d3 - self.m3, axis=0)).reshape(1, self.model.hidden2_dim) # Update Model self.model.W1 += epsilon[0] / self.batch_size * dW1 self.model.W2 += epsilon[1] / self.batch_size * dW2 self.model.b1 += epsilon[2] / self.batch_size * db1 self.model.b2 += epsilon[3] / self.batch_size * db2 self.model.b3 += epsilon[4] / self.batch_size * db3
def train(self, data, epsilon, k=[3, 1], offset_typ='DDD', meanfield=False): #positive phase id1 = numx.dot(data - self.model.o1, self.model.W1) d3 = numx.copy(self.model.o3) d2 = 0.0 #for _ in range(k[0]): if meanfield == False: for _ in range(k[0]): d3 = self.model.dtype(d3 > numx.random.random(d3.shape)) d2 = Sigmoid.f(id1 + numx.dot(d3 - self.model.o3, self.model.W2.T) + self.model.b2) d2 = self.model.dtype(d2 > numx.random.random(d2.shape)) d3 = Sigmoid.f( numx.dot(d2 - self.model.o2, self.model.W2) + self.model.b3) else: if meanfield == True: for _ in range(k[0]): d2 = Sigmoid.f(id1 + numx.dot(d3 - self.model.o3, self.model.W2.T) + self.model.b2) d3 = Sigmoid.f( numx.dot(d2 - self.model.o2, self.model.W2) + self.model.b3) else: d2_new = Sigmoid.f(id1 + numx.dot(d3 - self.model.o3, self.model.W2.T) + self.model.b2) d3_new = Sigmoid.f( numx.dot(d2_new - self.model.o2, self.model.W2) + self.model.b3) while numx.max(numx.abs(d2_new - d2)) > meanfield or numx.max( numx.abs(d3_new - d3)) > meanfield: d2 = d2_new d3 = d3_new d2_new = Sigmoid.f( id1 + numx.dot(d3_new - self.model.o3, self.model.W2.T) + self.model.b2) d3_new = Sigmoid.f( numx.dot(d2_new - self.model.o2, self.model.W2) + self.model.b3) d2 = d2_new d3 = d3_new #negative phase for _ in range(k[1]): self.m2 = Sigmoid.f( numx.dot(self.m1 - self.model.o1, self.model.W1) + numx.dot(self.m3 - self.model.o3, self.model.W2.T) + self.model.b2) self.m2 = self.model.dtype( self.m2 > numx.random.random(self.m2.shape)) self.m1 = Sigmoid.f( numx.dot(self.m2 - self.model.o2, self.model.W1.T) + self.model.b1) self.m1 = self.model.dtype( self.m1 > numx.random.random(self.m1.shape)) self.m3 = Sigmoid.f( numx.dot(self.m2 - self.model.o2, self.model.W2) + self.model.b3) self.m3 = self.model.dtype( self.m3 > numx.random.random(self.m3.shape)) # Estimate new means new_o1 = 0 if offset_typ[0] is 'D': new_o1 = data.mean(axis=0) if offset_typ[0] is 'A': new_o1 = (self.m1.mean(axis=0) + data.mean(axis=0)) / 2.0 if offset_typ[0] is 'M': new_o1 = self.m1.mean(axis=0) new_o2 = 0 if offset_typ[1] is 'D': new_o2 = d2.mean(axis=0) if offset_typ[1] is 'A': new_o2 = (self.m2.mean(axis=0) + d2.mean(axis=0)) / 2.0 if offset_typ[1] is 'M': new_o2 = self.m2.mean(axis=0) new_o3 = 0 if offset_typ[2] is 'D': new_o3 = d3.mean(axis=0) if offset_typ[2] is 'A': new_o3 = (self.m3.mean(axis=0) + d3.mean(axis=0)) / 2.0 if offset_typ[2] is 'M': new_o3 = self.m3.mean(axis=0) # Reparameterize self.model.b1 += epsilon[6] * numx.dot(new_o2 - self.model.o2, self.model.W1.T) self.model.b2 += epsilon[5] * numx.dot( new_o1 - self.model.o1, self.model.W1) + epsilon[7] * numx.dot( new_o3 - self.model.o3, self.model.W2.T) self.model.b3 += epsilon[6] * numx.dot(new_o2 - self.model.o2, self.model.W2) # Shift means self.model.o1 = (1.0 - epsilon[5]) * self.model.o1 + epsilon[5] * new_o1 self.model.o2 = (1.0 - epsilon[6]) * self.model.o2 + epsilon[6] * new_o2 self.model.o3 = (1.0 - epsilon[7]) * self.model.o3 + epsilon[7] * new_o3 # Calculate gradients dW1 = (numx.dot( (data - self.model.o1).T, d2 - self.model.o2) - numx.dot( (self.m1 - self.model.o1).T, self.m2 - self.model.o2)) dW2 = (numx.dot((d2 - self.model.o2).T, d3 - self.model.o3) - numx.dot( (self.m2 - self.model.o2).T, self.m3 - self.model.o3)) db1 = (numx.sum(data - self.m1, axis=0)).reshape(1, self.model.input_dim) db2 = (numx.sum(d2 - self.m2, axis=0)).reshape(1, self.model.hidden1_dim) db3 = (numx.sum(d3 - self.m3, axis=0)).reshape(1, self.model.hidden2_dim) # Update Model self.model.W1 += epsilon[0] / self.batch_size * dW1 self.model.W2 += epsilon[1] / self.batch_size * dW2 self.model.b1 += epsilon[2] / self.batch_size * db1 self.model.b2 += epsilon[3] / self.batch_size * db2 self.model.b3 += epsilon[4] / self.batch_size * db3
def __init__(self, input_dim, hidden1_dim, hidden2_dim, offset_typ, data, dtype=numx.float64): ''' Initializes the network :Parameters: input_dim: Number of input dimensions. -type: int hidden1_dim: Number of hidden dimensions for the first hidden layer. -type: int hidden2_dim: Number of hidden dimensions for the first hidden layer. -type: int offset_typ: Typs of offset values used for specific initialization 'DDD' -> Centering, 'AAA'-> Enhanced gradient,'MMM' -> Model mean centering -type: string (3 chars) ''' # Set used data type self.dtype = dtype # Set dimensions self.input_dim = input_dim self.hidden1_dim = hidden1_dim self.hidden2_dim = hidden2_dim # Initialize weights self.W1 = numx.random.randn(input_dim, hidden1_dim) * 0.01 self.W2 = numx.random.randn(hidden1_dim, hidden2_dim) * 0.01 # Initialize offsets self.o1 = numx.zeros((1, input_dim)) self.o2 = numx.zeros((1, hidden1_dim)) self.o3 = numx.zeros((1, hidden2_dim)) self.b1 = numx.zeros((1, input_dim)) self.b2 = numx.zeros((1, hidden1_dim)) self.b3 = numx.zeros((1, hidden2_dim)) if data is not None: datamean = numx.mean(data, axis=0).reshape(1, input_dim) if offset_typ[0] is '0': self.b1 = Sigmoid.g(numx.clip(datamean, 0.001, 0.999)) if offset_typ[0] is 'D': self.o1 = numx.copy(datamean) self.b1 = Sigmoid.g(numx.clip(self.o1, 0.001, 0.999)) if offset_typ[0] is 'A': self.o1 = (datamean + 0.5) / 2.0 self.b1 = Sigmoid.g(numx.clip(self.o1, 0.001, 0.999)) if offset_typ[0] is 'M': self.o1 += 0.5 else: if offset_typ[0] != '0': self.o1 += 0.5 if offset_typ[1] != '0': self.o2 += 0.5 if offset_typ[2] != '0': self.o3 += 0.5