예제 #1
0
def _LL_lower_bound_check(model, x, lnZ, conv_thres=0.0001, max_iter=100000):
    ''' Computes the log likelihood lower bound for x by approximating h1, h2
        by Mean field estimates.
        .. seealso:: AISTATS 2009: Deep Bolzmann machines
             http://machinelearning.wustl.edu/mlpapers/paper_files/AISTATS09_SalakhutdinovH.pdf

    :Parameters:
        model:       The model
                    -type: Valid DBM model

        x:           Input states.
                    -type: numpy array [batch size, input dim]

        lnZ:         Logarithm of the patition function.
                    -type: float

        conv_thres:  Convergence threshold for the mean field approximation
                    -type: float

        max_iter:    If convergence threshold not reached, maximal number of sampling steps
                    -type: int

    :Returns:
        Log likelihood lower bound for x.
       -type: numpy array [batch size, 1]

    '''
    # Pre calc activation from x since it is constant
    id1 = numx.dot(x - model.o1, model.W1)
    # Initialize mu3 with its mean
    d3 = numx.zeros((x.shape[0], model.hidden2_dim))
    d2 = numx.zeros((x.shape[0], model.hidden1_dim))
    # While convergence of max number of iterations not reached,
    # run mean field estimation
    for i in range(x.shape[0]):
        d3_temp = numx.copy(model.o3)
        d2_temp = 0.0
        d2_new = Sigmoid.f(id1[i, :] +
                           numx.dot(d3_temp - model.o3, model.W2.T) + model.b2)
        d3_new = Sigmoid.f(numx.dot(d2_new - model.o2, model.W2) + model.b3)
        while numx.max(numx.abs(d2_new - d2_temp)) > conv_thres or numx.max(
                numx.abs(d3_new - d3_temp)) > conv_thres:
            d2_temp = d2_new
            d3_temp = d3_new
            d2_new = Sigmoid.f(id1[i, :] +
                               numx.dot(d3_new - model.o3, model.W2.T) +
                               model.b2)
            d3_new = Sigmoid.f(
                numx.dot(d2_new - model.o2, model.W2) + model.b3)
        d2[i] = numx.clip(d2_new, 0.0000000000000001,
                          0.9999999999999999).reshape(1, model.hidden1_dim)
        d3[i] = numx.clip(d3_new, 0.0000000000000001,
                          0.9999999999999999).reshape(1, model.hidden2_dim)
    # Return ernegy of states + the entropy of h1.h2 due to the mean field approximation
    return -model.energy(x, d2, d3) - lnZ - numx.sum(
        d2 * numx.log(d2) + (1.0 - d2) * numx.log(1.0 - d2), axis=1).reshape(
            x.shape[0], 1) - numx.sum(d3 * numx.log(d3) +
                                      (1.0 - d3) * numx.log(1.0 - d3),
                                      axis=1).reshape(x.shape[0], 1)
예제 #2
0
    def sample(self, activation):
        ''' This function samples states from the activation.
            
        :Parameters:
            activation: pre and post synaptiv activation.
                       -type: list len(2) of numpy arrays [batch_size, input dim]

        '''
        # numx.clip(a=activation[1], a_min=-1.0, a_max=1.0, out=activation[1])
        activation3 = numx.maximum(0.0, activation[1] + numx.random.randn(activation[1].shape[0],
                                                                          activation[1].shape[1]) * numx.sqrt(
            Sigmoid.f(activation[1])))
        activation3 = numx.minimum(1.0, activation3)
        # activation3 = activation[1] + numx.random.randn(activation[1].shape[0],activation[1].shape[1]) * numx.sqrt(Sigmoid.f(activation[1]))
        # activation3 = numx.maximum(0.0,activation[1] + numx.random.randn(activation[1].shape[0],activation[1].shape[1]) * numx.sqrt(Sigmoid.f(activation[1])))
        # numx.clip(a = activation3,a_min=0.0,a_max=1.0,out = activation3)

        return activation3
예제 #3
0
    def activation(self,
                   bottom_up_states,
                   top_down_states,
                   bottom_up_pre=None,
                   top_down_pre=None):
        ''' Calculates the pre and post synaptic activation.
            
        :Parameters:
            bottom_up_states: activation comming from previous layer.
                             -type: numpy array [batch_size, input dim]
                     
            top_down_states:  activation comming from next layer.
                             -type: numpy array [batch_size, input dim]
                             
            bottom_up_pre:    pre-activation comming from previous layer of None.
                              if given this pre activation is used to avoid re-caluclations.
                             -type: None or numpy array [batch_size, input dim]
                     
            top_down_pre:     pre-activation comming from next layer of None.
                              if given this pre activation is used to avoid re-caluclations.
                             -type: None or numpy array [batch_size, input dim]
                             
        :Returns:
            Pre and post synaptic activation for this layer.
           -type: numpy array [batch_size, input dim]

        '''
        pre_act = 0.0
        if self.input_weight_layer is not None:
            if bottom_up_pre is None:
                pre_act += self.input_weight_layer.propagate_up(
                    bottom_up_states)
            else:
                pre_act += bottom_up_pre
        if self.output_weight_layer is not None:
            if top_down_pre is None:
                pre_act += self.output_weight_layer.propagate_down(
                    top_down_states)
            else:
                pre_act += top_down_pre
        pre_act += self.bias
        return Sigmoid.f(pre_act), pre_act
예제 #4
0
    def train(self,
              data,
              epsilon,
              k=[3, 1],
              offset_typ='DDD',
              meanfield=False):

        #positive phase
        id1 = numx.dot(data - self.model.o1, self.model.W1)
        d3 = numx.copy(self.model.o3)
        d2 = numx.copy(self.model.o2)
        #for _ in range(k[0]):
        if meanfield == False:
            for _ in range(k[0]):
                d2 = Sigmoid.f(id1 +
                               numx.dot(d3 - self.model.o3, self.model.W2.T) +
                               self.model.b2)
                d2 = self.model.dtype(d2 > numx.random.random(d2.shape))
                d3 = Sigmoid.f(
                    numx.dot(d2 - self.model.o2, self.model.W2) +
                    self.model.b3)
                d3 = self.model.dtype(d3 > numx.random.random(d3.shape))
        else:
            if meanfield == True:
                for _ in range(k[0]):
                    d2 = Sigmoid.f(id1 +
                                   numx.dot(d3 -
                                            self.model.o3, self.model.W2.T) +
                                   self.model.b2)
                    d3 = Sigmoid.f(
                        numx.dot(d2 - self.model.o2, self.model.W2) +
                        self.model.b3)
            else:
                d2_new = Sigmoid.f(id1 +
                                   numx.dot(d3 -
                                            self.model.o3, self.model.W2.T) +
                                   self.model.b2)
                d3_new = Sigmoid.f(
                    numx.dot(d2_new - self.model.o2, self.model.W2) +
                    self.model.b3)
                while numx.max(numx.abs(d2_new - d2)) > meanfield or numx.max(
                        numx.abs(d3_new - d3)) > meanfield:
                    d2 = d2_new
                    d3 = d3_new
                    d2_new = Sigmoid.f(
                        id1 +
                        numx.dot(d3_new - self.model.o3, self.model.W2.T) +
                        self.model.b2)
                    d3_new = Sigmoid.f(
                        numx.dot(d2_new - self.model.o2, self.model.W2) +
                        self.model.b3)
                d2 = d2_new
                d3 = d3_new

        self.sampler.model = RBM_MODEL.BinaryBinaryRBM(
            number_visibles=self.model.input_dim + self.model.hidden2_dim,
            number_hiddens=self.model.hidden1_dim,
            data=None,
            initial_weights=numx.vstack((self.model.W1, self.model.W2.T)),
            initial_visible_bias=numx.hstack((self.model.b1, self.model.b3)),
            initial_hidden_bias=self.model.b2,
            initial_visible_offsets=numx.hstack(
                (self.model.o1, self.model.o3)),
            initial_hidden_offsets=self.model.o2)
        if isinstance(self.sampler, RBM_SAMPLER.GibbsSampler):
            sample = self.sampler.sample(numx.hstack((data, d3)))
        else:
            sample = self.sampler.sample(self.batch_size, k[1])
        self.m2 = self.sampler.model.probability_h_given_v(sample)
        self.m1 = sample[:, 0:self.model.input_dim]
        self.m3 = sample[:, self.model.input_dim:]

        # Estimate new means
        new_o1 = 0
        if offset_typ[0] is 'D':
            new_o1 = data.mean(axis=0)
        if offset_typ[0] is 'A':
            new_o1 = (self.m1.mean(axis=0) + data.mean(axis=0)) / 2.0
        if offset_typ[0] is 'M':
            new_o1 = self.m1.mean(axis=0)

        new_o2 = 0
        if offset_typ[1] is 'D':
            new_o2 = d2.mean(axis=0)
        if offset_typ[1] is 'A':
            new_o2 = (self.m2.mean(axis=0) + d2.mean(axis=0)) / 2.0
        if offset_typ[1] is 'M':
            new_o2 = self.m2.mean(axis=0)

        new_o3 = 0
        if offset_typ[2] is 'D':
            new_o3 = d3.mean(axis=0)
        if offset_typ[2] is 'A':
            new_o3 = (self.m3.mean(axis=0) + d3.mean(axis=0)) / 2.0
        if offset_typ[2] is 'M':
            new_o3 = self.m3.mean(axis=0)

        # Reparameterize
        self.model.b1 += epsilon[6] * numx.dot(new_o2 - self.model.o2,
                                               self.model.W1.T)
        self.model.b2 += epsilon[5] * numx.dot(
            new_o1 - self.model.o1, self.model.W1) + epsilon[7] * numx.dot(
                new_o3 - self.model.o3, self.model.W2.T)
        self.model.b3 += epsilon[7] * numx.dot(new_o2 - self.model.o2,
                                               self.model.W2)

        # Shift means
        self.model.o1 = (1.0 -
                         epsilon[5]) * self.model.o1 + epsilon[5] * new_o1
        self.model.o2 = (1.0 -
                         epsilon[6]) * self.model.o2 + epsilon[6] * new_o2
        self.model.o3 = (1.0 -
                         epsilon[7]) * self.model.o3 + epsilon[7] * new_o3

        # Calculate gradients
        dW1 = (numx.dot(
            (data - self.model.o1).T, d2 - self.model.o2) - numx.dot(
                (self.m1 - self.model.o1).T, self.m2 - self.model.o2))
        dW2 = (numx.dot((d2 - self.model.o2).T, d3 - self.model.o3) - numx.dot(
            (self.m2 - self.model.o2).T, self.m3 - self.model.o3))

        db1 = (numx.sum(data - self.m1,
                        axis=0)).reshape(1, self.model.input_dim)
        db2 = (numx.sum(d2 - self.m2, axis=0)).reshape(1,
                                                       self.model.hidden1_dim)
        db3 = (numx.sum(d3 - self.m3, axis=0)).reshape(1,
                                                       self.model.hidden2_dim)

        # Update Model
        self.model.W1 += epsilon[0] / self.batch_size * dW1
        self.model.W2 += epsilon[1] / self.batch_size * dW2

        self.model.b1 += epsilon[2] / self.batch_size * db1
        self.model.b2 += epsilon[3] / self.batch_size * db2
        self.model.b3 += epsilon[4] / self.batch_size * db3
예제 #5
0
    def train(self,
              data,
              epsilon,
              k=[3, 1],
              offset_typ='DDD',
              meanfield=False):

        #positive phase
        id1 = numx.dot(data - self.model.o1, self.model.W1)
        d3 = numx.copy(self.model.o3)
        d2 = 0.0
        #for _ in range(k[0]):
        if meanfield == False:
            for _ in range(k[0]):
                d3 = self.model.dtype(d3 > numx.random.random(d3.shape))
                d2 = Sigmoid.f(id1 +
                               numx.dot(d3 - self.model.o3, self.model.W2.T) +
                               self.model.b2)
                d2 = self.model.dtype(d2 > numx.random.random(d2.shape))
                d3 = Sigmoid.f(
                    numx.dot(d2 - self.model.o2, self.model.W2) +
                    self.model.b3)
        else:
            if meanfield == True:
                for _ in range(k[0]):
                    d2 = Sigmoid.f(id1 +
                                   numx.dot(d3 -
                                            self.model.o3, self.model.W2.T) +
                                   self.model.b2)
                    d3 = Sigmoid.f(
                        numx.dot(d2 - self.model.o2, self.model.W2) +
                        self.model.b3)
            else:
                d2_new = Sigmoid.f(id1 +
                                   numx.dot(d3 -
                                            self.model.o3, self.model.W2.T) +
                                   self.model.b2)
                d3_new = Sigmoid.f(
                    numx.dot(d2_new - self.model.o2, self.model.W2) +
                    self.model.b3)
                while numx.max(numx.abs(d2_new - d2)) > meanfield or numx.max(
                        numx.abs(d3_new - d3)) > meanfield:
                    d2 = d2_new
                    d3 = d3_new
                    d2_new = Sigmoid.f(
                        id1 +
                        numx.dot(d3_new - self.model.o3, self.model.W2.T) +
                        self.model.b2)
                    d3_new = Sigmoid.f(
                        numx.dot(d2_new - self.model.o2, self.model.W2) +
                        self.model.b3)
                d2 = d2_new
                d3 = d3_new

        #negative phase
        for _ in range(k[1]):
            self.m2 = Sigmoid.f(
                numx.dot(self.m1 - self.model.o1, self.model.W1) +
                numx.dot(self.m3 - self.model.o3, self.model.W2.T) +
                self.model.b2)
            self.m2 = self.model.dtype(
                self.m2 > numx.random.random(self.m2.shape))
            self.m1 = Sigmoid.f(
                numx.dot(self.m2 - self.model.o2, self.model.W1.T) +
                self.model.b1)
            self.m1 = self.model.dtype(
                self.m1 > numx.random.random(self.m1.shape))
            self.m3 = Sigmoid.f(
                numx.dot(self.m2 - self.model.o2, self.model.W2) +
                self.model.b3)
            self.m3 = self.model.dtype(
                self.m3 > numx.random.random(self.m3.shape))

        # Estimate new means
        new_o1 = 0
        if offset_typ[0] is 'D':
            new_o1 = data.mean(axis=0)
        if offset_typ[0] is 'A':
            new_o1 = (self.m1.mean(axis=0) + data.mean(axis=0)) / 2.0
        if offset_typ[0] is 'M':
            new_o1 = self.m1.mean(axis=0)

        new_o2 = 0
        if offset_typ[1] is 'D':
            new_o2 = d2.mean(axis=0)
        if offset_typ[1] is 'A':
            new_o2 = (self.m2.mean(axis=0) + d2.mean(axis=0)) / 2.0
        if offset_typ[1] is 'M':
            new_o2 = self.m2.mean(axis=0)

        new_o3 = 0
        if offset_typ[2] is 'D':
            new_o3 = d3.mean(axis=0)
        if offset_typ[2] is 'A':
            new_o3 = (self.m3.mean(axis=0) + d3.mean(axis=0)) / 2.0
        if offset_typ[2] is 'M':
            new_o3 = self.m3.mean(axis=0)

        # Reparameterize
        self.model.b1 += epsilon[6] * numx.dot(new_o2 - self.model.o2,
                                               self.model.W1.T)
        self.model.b2 += epsilon[5] * numx.dot(
            new_o1 - self.model.o1, self.model.W1) + epsilon[7] * numx.dot(
                new_o3 - self.model.o3, self.model.W2.T)
        self.model.b3 += epsilon[6] * numx.dot(new_o2 - self.model.o2,
                                               self.model.W2)

        # Shift means
        self.model.o1 = (1.0 -
                         epsilon[5]) * self.model.o1 + epsilon[5] * new_o1
        self.model.o2 = (1.0 -
                         epsilon[6]) * self.model.o2 + epsilon[6] * new_o2
        self.model.o3 = (1.0 -
                         epsilon[7]) * self.model.o3 + epsilon[7] * new_o3

        # Calculate gradients
        dW1 = (numx.dot(
            (data - self.model.o1).T, d2 - self.model.o2) - numx.dot(
                (self.m1 - self.model.o1).T, self.m2 - self.model.o2))
        dW2 = (numx.dot((d2 - self.model.o2).T, d3 - self.model.o3) - numx.dot(
            (self.m2 - self.model.o2).T, self.m3 - self.model.o3))

        db1 = (numx.sum(data - self.m1,
                        axis=0)).reshape(1, self.model.input_dim)
        db2 = (numx.sum(d2 - self.m2, axis=0)).reshape(1,
                                                       self.model.hidden1_dim)
        db3 = (numx.sum(d3 - self.m3, axis=0)).reshape(1,
                                                       self.model.hidden2_dim)

        # Update Model
        self.model.W1 += epsilon[0] / self.batch_size * dW1
        self.model.W2 += epsilon[1] / self.batch_size * dW2

        self.model.b1 += epsilon[2] / self.batch_size * db1
        self.model.b2 += epsilon[3] / self.batch_size * db2
        self.model.b3 += epsilon[4] / self.batch_size * db3