Exemple #1
0
 def fpropDropout(self, inputBatch, weightsToStopBefore = None ):
     """
     Perform a (possibly partial) forward pass through the
     network. Updates self.state which, on a full forward pass,
     holds the input followed by each hidden layer's activation and
     finally the net input incident on the output layer. For a full
     forward pass, we return the actual output unit activations. In
     a partial forward pass we return None.
     """
     inputBatch = inputBatch if isinstance(inputBatch, gnp.garray) else gnp.garray(inputBatch)
     if weightsToStopBefore == None:
         weightsToStopBefore = len(self.weights)
     #self.state holds everything before the output nonlinearity, including the net input to the output units
     sample = (gnp.rand(*inputBatch.shape) > self.dropouts[0])
     self.state = [inputBatch * sample]
     for i in range(min(len(self.weights) - 1, weightsToStopBefore)):
         dropoutMultiplier = 1.0/(1.0-self.dropouts[i])
         curActs = self.hidActFuncts[i].activation(gnp.dot(dropoutMultiplier*self.state[-1], self.weights[i]) + self.biases[i])
         sample = (gnp.rand(*curActs.shape) > self.dropouts[i+1])
         self.state.append(curActs * sample)
     if weightsToStopBefore >= len(self.weights):
         dropoutMultiplier = 1.0/(1.0-self.dropouts[-1])
         self.state.append(gnp.dot(dropoutMultiplier*self.state[-1], self.weights[-1]) + self.biases[-1])
         self.acts = self.outputActFunct.activation(self.state[-1])
         return self.acts
     #we didn't reach the output units
     # To return the first set of hidden activations, we would set
     # weightsToStopBefore to 1.
     return self.state[weightsToStopBefore]
Exemple #2
0
    def R_forward_pass(self, state, R):
        """
Apply the R-operator on RNN. R is an RNN object which represents the
vector we multiply by. Note that it needs to know the RNN's state, so
that it doesn't have to unnecessarily recompute the state.
"""

        V, H, OX = state

        if V[0] is not None:
            V = [None] + V

        assert V[0] is None

        T = len(V)-1
        batch_size = len(V[1])
        R_OX, R_HX = [[None]*(T+1) for _ in range(2)]

        import numpy as np

        R_H_t = g.tile(R.h_init, (batch_size, 1))
        for t in range(1, T+1):
            R_H_1t = R_H_t

            R_HX[t] = g.dot(R_H_1t, self.W_hh) + g.dot(H[t-1], R.W_hh) + g.dot(V[t], R.W_vh)
            R_H_t = self.hid_nonlin.grad_y(H[t]) * R_HX[t]
            R_OX[t] = g.dot(H[t], R.W_ho) + g.dot(R_H_t, self.W_ho)


        #             \/---(for the structured reg).
        return (R_HX, R_OX[1:])
Exemple #3
0
    def backprop(self):
        self.timer_logger('backprop', time.time())
        self.results['grads'] = []
        self.results['bias_grads'] = []
        if self.problem == 'classification':
            #assumes softmax + cross entropy so that both gradients cancel out to give: error = y-t
            self.results['error'] = self.results['current'] - gpu.garray(
                self.util.create_t_dataset(self.batch_y))
        else:
            #assumes linear unit + squared error cost function so that both gradients cancel out to give: error = y-t
            self.results['error'] = (self.results['current'] -
                                     gpu.garray(self.batch_y))

        for pair in self.results['activations']:
            activation = pair[0]
            weight = pair[1]

            gradient = self.activation_gradient(activation)
            self.results['grads'].insert(
                0, gpu.dot(activation.T, self.results['error']))
            self.results['bias_grads'].insert(
                0,
                gpu.dot(gpu.ones((1, self.results['error'].shape[0])),
                        self.results['error']))
            self.results['error'] = gpu.dot(self.results['error'],
                                            weight.T) * gradient

        self.timer_logger('backprop', time.time())
Exemple #4
0
    def forward_pass(self, batch, O=None):
        if len(batch)==2 and type(batch)==tuple:
            V,O=batch
            assert len(V)==len(O)
        elif len(batch)==3 and type(batch)==tuple:
            V,O,M=batch
            assert len(V)==len(O)==len(M)


        else:
            V=batch
        if V[0] is not None:
            V = [None] + V
        

        T = len(V)-1
        batch_size = len(V[1])

        A, B, H, OX = [[None]*(T+1) for _ in range(4)]

        H[0] = g.tile(self.h_init, (batch_size, 1))
        for t in range(1, T+1):
            B[t] = g.dot(V[t],   self.W_vf).tanh() 
            A[t] = g.dot(H[t-1], self.W_hf)
            C_t = g.dot(V[t], self.W_vh) # + hh stuff

            AB = A[t]*(B[t] + self.f_bias)

            HX_t = g.dot(AB, self.W_fh) + C_t
            H[t] = self.hid_nonlin(HX_t)

            OX[t] = g.dot(H[t], self.W_ho)

        return (V[1:], A, B, H, OX[1:])
Exemple #5
0
def costfunc_gpu_ReLU(x, *args):
    num_input,num_hidden,num_output,inputs,lambda_val,sparsityParam,beta = args
    num_weights1 = (num_input+1)*num_hidden
    x = gpu.garray(x)
    inputs = gpu.garray(inputs)
    #weights1 = gpu.garray(reshape(x[0:num_weights1],(num_hidden,num_input+1)))
    weights1 = x[0:num_weights1].reshape((num_hidden,num_input+1))
    #weights2 = gpu.garray(reshape(x[num_weights1:shape(x)[0]], (num_output,num_hidden+1)))
    weights2 = x[num_weights1:shape(x)[0]].reshape((num_output,num_hidden+1))
    nData = shape(inputs)[1]
    data = gpu.concatenate((gpu.ones((1,nData)), inputs), axis = 0)
    hidden_sum = gpu.dot(weights1, data)
    hidden_activation = gpu.log(1+hidden_sum.exp())
    p_avg = gpu.sum(hidden_activation,axis=1)/nData
    hidden_activation = gpu.concatenate((gpu.ones((1,nData)), hidden_activation), axis = 0)
    output = gpu.dot(weights2, hidden_activation)
    regularized_penalty1 = weights1[:,1:shape(weights1)[1]]
    regularized_penalty2 = weights2[:,1:shape(weights2)[1]]
    regularized_penalty1 = regularized_penalty1 * regularized_penalty1
    regularized_penalty2 = regularized_penalty2 * regularized_penalty2
    output_target_diff = (output - inputs)*(output - inputs)
    KL = gpu.sum(sparsityParam*gpu.log(sparsityParam/p_avg) + (1-sparsityParam)*gpu.log((1-sparsityParam)/(1-p_avg)))
    cost = gpu.sum(output_target_diff)/(2*nData) + 0.5 * lambda_val * (gpu.sum(regularized_penalty1) + gpu.sum(regularized_penalty2)) + beta*KL
    print 'ReLU Linear Decoder Cost: ', cost
    return cost
def dbn_forward_pass(ws_vh, ws_v, ws_h, x, y=None):
    """
    Deep belief net forward pass.
    
    x: input data (N x D matrix)
    y: Class label (1-of-K coded, N x K matrix). If not None, it is concatenated
        to the input for top layer RBM when calculating the output of the DBN.
    ws_vh: list of layer weights (L x D x H)
    ws_v: list of layer input biases (L x D x 1)
    ws_h: list of layer output biases (L x H x 1)
    Returns activations (continuous) and outputs (0-1, sigmoid(activations)) of
    top layer
    """
    L = len(ws_vh)
    h = x.T

    # forward (bottom-up) pass
    for l in range(L - 1):
        ah = gnp.dot(ws_vh[l].T, h) + ws_h[l]
        h = gnp.logistic(ah)

    # if supervised, concatenate class labels to input to top layer RBM
    if y is not None:
        h = gnp.concatenate((y.T, h))

    ah = gnp.dot(ws_vh[-1].T, h) + ws_h[-1]
    h = gnp.logistic(ah)

    return ah.T, h.T
Exemple #7
0
def exact_fisher_information_biases(rbm, batch_units=10, show_progress=False):
    batch_size = 2 ** batch_units

    nvis, nhid = rbm.nvis, rbm.nhid
    num_params = nvis + nhid

    s = gnp.zeros(num_params)
    G = gnp.zeros((num_params, num_params))

    for hid, p in iter_configurations(rbm, batch_units=batch_units, show_progress=show_progress):
        g = gnp.zeros((batch_size, num_params))
        cond_vis = gnp.logistic(rbm.vis_inputs(hid))

        g[:, :nvis] = cond_vis
        g[:, nvis:] = hid

        s += gnp.dot(p, g)
        G += gnp.dot(g.T * p, g)

        diag_term = gnp.dot(p, g * (1. - g))
        G += np.diag(diag_term.as_numpy_array())

    G -= s[:, nax] * s[nax, :]

    return G
Exemple #8
0
    def forward(self, X, test=False):
        """
        Feed-forward pass through the model
        X: ('batchsize' x 'context') matrix of word indices
        """
        batchsize = X.shape[0]
        R = self.R
        C = self.C
        bw = self.bw

        # Obtain word features
        tmp = R.as_numpy_array()[:, X.flatten()].flatten(order='F')
        tmp = tmp.reshape((batchsize, self.K * self.context))
        words = np.zeros((batchsize, self.K, self.context))
        for i in range(batchsize):
            words[i, :, :] = tmp[i, :].reshape((self.K, self.context),
                                               order='F')
        words = gpu.garray(words)

        # Compute the hidden layer (predicted next word representation)
        acts = gpu.zeros((batchsize, self.K))
        for i in range(self.context):
            acts = acts + gpu.dot(words[:, :, i], C[i, :, :])
        acts = gpu.concatenate((acts, gpu.ones((batchsize, 1))), 1)

        # Compute softmax
        preds = gpu.dot(acts, gpu.concatenate((R, bw)))
        preds = gpu.exp(preds - preds.max(1).reshape(batchsize, 1))
        denom = preds.sum(1).reshape(batchsize, 1)
        preds = gpu.concatenate((preds / denom, gpu.ones((batchsize, 1))), 1)

        return (words, acts, preds.as_numpy_array())
Exemple #9
0
    def backward(self, Y, preds, acts, words, X):
        """
        Backward pass through the network
        """
        batchsize = preds.shape[0]

        # Compute part of df/dR
        Ix = gpu.garray(preds[:, :-1] - Y) / batchsize
        delta = gpu.dot(acts.T, Ix)
        dR = delta[:-1, :] + self.gamma_r * self.R
        db = delta[-1, :]
        dR = dR.as_numpy_array()

        # Compute df/dC and word inputs for df/dR
        Ix = gpu.dot(Ix, self.R.T)
        dC = gpu.zeros(np.shape(self.C))
        for i in range(self.context):
            delta = gpu.dot(words[:, :, i].T, Ix)
            dC[i, :, :] = delta + self.gamma_c * self.C[i, :, :]
            delta = gpu.dot(Ix, self.C[i, :, :].T)
            delta = delta.as_numpy_array()
            for j in range(X.shape[0]):
                dR[:, X[j, i]] = dR[:, X[j, i]] + delta.T[:, j]

        self.dR = gpu.garray(dR)
        self.db = db
        self.dC = dC
    def backprop(self, X, y_target) :
        # forward
        activity = []
        result = X
        for i in range(len(self.weights)):
            p = self.dropout_probability[i]
            mask = (g.rand(result.shape) >= p)
            result = result * mask
            del mask
            activity.append(result)
            w,b = self.weights[i]
            result = g.dot(result,w) + b
            result = self.activation[i](result)
            
        # backward
        gradientNodes = []
        lastGradient = self.gradient[-1](result, y_target)
        gradientNodes.append(lastGradient)
        for i in reversed(range(1,len(self.weights))):
            w,b = self.weights[i]
            lastGradient = g.dot(lastGradient, w.T) * self.gradient[i-1](activity[i])
            gradientNodes.append(lastGradient)
                
        # get gradient
        resultGradient = []
        for i in range(len(self.weights)):
            gradW = (g.dot(activity[i].T,gradientNodes[-(i+1)]) / len(X))
            assert(gradW.shape == self.weights[i][0].shape)
            gradB = (g.sum(gradientNodes[-(i+1)],axis=0) / len(X))
            assert(gradB.shape == self.weights[i][1].shape)
            resultGradient.append([gradW,gradB])

        del gradientNodes
        
        return resultGradient
    def bprop(self, outputErrSignal, MLerr, fpropState=None):
        """
		Perform a backward pass through the network. fpropState
		defaults to self.state (set during fprop) and outputErrSignal
		should be self.outputActFunct.dErrordNetInput(...).
		"""

        # Manifold learning

        ml_sense = [None for i in range(len(self.weights))]
        pivt_sense = [None for i in range(len(self.weights))]

        ml_sense[-1] = MLerr * self.actsML * (1 - self.actsML)
        pivt_sense[-1] = outputErrSignal - MLerr * self.actsMLpvt * (
            1 - self.actsMLpvt)

        for i in reversed(range(len(self.weights) - 1)):

            ml_sense[i] = gnp.dot(
                ml_sense[i + 1],
                self.weights[i + 1].T) * self.hidActFuncts[i].dEdNetInput(
                    self.stateML[i + 1])
            pivt_sense[i] = gnp.dot(pivt_sense[i + 1], self.weights[
                i + 1].T) * self.hidActFuncts[i].dEdNetInput(self.pivt[i + 1])

        return ml_sense, pivt_sense
Exemple #12
0
    def backward(self, dEdY):
        N = dEdY.shape[0]
        S = self.windowSize
        T = dEdY.shape[1] + S - 1
        F = dEdY.shape[2]
        D = self.X.shape[2]
        dEdY = dEdY.reshape(N * (T - S + 1), F)
        dEdX = np.zeros(self.X.shape, self.X.dtype)
        
        if self.gpu:
            gdEdY = gpu.as_garray(dEdY.astype('float32'))
            self.dEdW = gpu.dot(self.Z.transpose(), gdEdY)
        else:
            self.dEdW = np.dot(self.Z.transpose(), dEdY)

        if self.outputdEdX:
            if self.gpu:
                gdEdZ = gpu.dot(gdEdY, self.W.transpose())
                dEdZ = gpu.as_numpy_array(gdEdZ)
            else:
                dEdZ = np.dot(dEdY, self.W.transpose())

            dEdZ = dEdZ.reshape(N, T - S + 1, S, D)
            for t in range(0, T):
                if t <= S - 1:
                    dEdX[:, t, :] = np.sum(dEdZ[:, range(0, t + 1), range(t, -1, -1), :], axis=1)
                elif t >= T - S + 1:
                    dEdX[:, t, :] = np.sum(dEdZ[:, range(t - S + 1, T - S + 1), range(S - 1, S - (T - t) - 1, -1), :], axis=1)
                else:
                    dEdX[:, t, :] = np.sum(dEdZ[:, range(t - S + 1, t + 1), range(S - 1, -1, -1), :], axis=1)
        return dEdX
Exemple #13
0
    def backward_pass(self, state, dOX, compute_grad2 = False):
        grad = self.unpack(self.pack() * 0)

        if compute_grad2:
            grad2 = self.unpack(self.pack() * 0)
        else:
            grad2 = None

        dY = dOX
        for i in reversed(range(len(self.sizes) - 1)):
            dX = self.nonlins[i].grad_y(state[i + 1]) * dY

            X = state[i]

            #state[i + 1] = self.hid_nonlin(g.dot(X, self.W[i]) + self.b[i])
            grad.b[i] += dX.sum(0)
            grad.W[i] += g.dot(X.T, dX)

            if compute_grad2:
                grad2.b[i] += (dX*dX).sum(0)
                grad2.W[i] += g.dot((X*X).T, dX*dX)


            ## backprop the gradient:
            if i > 0: # typically the first multiplication is the costliest.
                dY = g.dot(dX, self.W[i].T)

        return grad, grad2
def rbm_sample(w_vh, w_v, w_h, x, k=1, clamped=None):
    """
    Sample from RBM with k steps of Gibbs sampling
    
    w_vh: Weights between visible and hidden units (matrix of size DxH)
    w_v: Visible unit biases (column vector of size Dx1)
    w_h: Hidden unit biases (column vector of size Hx1)
    x: Input (column vector of size DxN)
    k: Number of Gibbs steps. Default is 1.
    clamped: If not None, keeps the given elements of x clamped (constant)
        while sampling
        clamped is a two-tuple that gives the start and end indices of clamped elements
    Returns hidden unit and visible unit activations (matrices of size HxN, DxN)
    """
    if clamped is not None:
        cx = x[clamped[0] : clamped[1], :]

    v = x
    for i in range(k):
        # sample hiddens
        ah = gnp.dot(w_vh.T, v) + w_h
        h = gnp.logistic(ah)
        hs = h > gnp.rand(h.shape[0], h.shape[1])

        # sample visibles
        av = gnp.dot(w_vh, hs) + w_v
        v = gnp.logistic(av)

        if clamped is not None:
            v[clamped[0] : clamped[1], :] = cx

    return h, v
Exemple #15
0
 def fprop(self, inputBatch, weightsToStopBefore=None):
     """
     Perform a (possibly partial) forward pass through the
     network. Updates self.state which, on a full forward pass,
     holds the input followed by each hidden layer's activation and
     finally the net input incident on the output layer. For a full
     forward pass, we return the actual output unit activations. In
     a partial forward pass we return None.
     """
     inputBatch = inputBatch if isinstance(
         inputBatch, gnp.garray) else gnp.garray(inputBatch)
     if weightsToStopBefore == None:
         weightsToStopBefore = len(self.weights)
     #self.state holds everything before the output nonlinearity, including the net input to the output units
     self.state = [inputBatch]
     for i in range(min(len(self.weights) - 1, weightsToStopBefore)):
         curActs = self.hidActFuncts[i].activation(
             gnp.dot(self.state[-1], self.weights[i]) + self.biases[i])
         self.state.append(curActs)
     if weightsToStopBefore >= len(self.weights):
         self.state.append(
             gnp.dot(self.state[-1], self.weights[-1]) + self.biases[-1])
         self.acts = self.outputActFunct.activation(self.state[-1])
         return self.acts
     #we didn't reach the output units
     # To return the first set of hidden activations, we would set
     # weightsToStopBefore to 1.
     return self.state[weightsToStopBefore]
Exemple #16
0
    def forward(self, X, test=False):
        """
        Feed-forward pass through the model
        X: ('batchsize' x 'context') matrix of word indices
        """
        batchsize = X.shape[0]
        R = self.R
        C = self.C
        bw = self.bw

        # Obtain word features
        tmp = R.as_numpy_array()[:,X.flatten()].flatten(order='F')  # flatten(), default in row-major order, order='F' means Fortran(column-major) order
        tmp = tmp.reshape((batchsize, self.K * self.context))   # reshape(), in row-major order
        words = np.zeros((batchsize, self.K, self.context))
        for i in range(batchsize):
            words[i,:,:] = tmp[i,:].reshape((self.K, self.context), order='F')
        words = gpu.garray(words)

        # Compute the hidden layer (predicted next word representation)
        acts = gpu.zeros((batchsize, self.K))
        for i in range(self.context):
            acts = acts + gpu.dot(words[:,:,i], C[i,:,:]) # the dot() of 2-D matrix is equiverlent to multiply
        acts = gpu.concatenate((acts, gpu.ones((batchsize, 1))), 1)

        # Compute softmax
        preds = gpu.dot(acts, gpu.concatenate((R, bw)))
        preds = gpu.exp(preds - preds.max(1).reshape(batchsize, 1))
        denom = preds.sum(1).reshape(batchsize, 1)
        preds = gpu.concatenate((preds / denom, gpu.ones((batchsize, 1))), 1)

        return (words, acts, preds.as_numpy_array())
Exemple #17
0
    def backward(self, Y, preds, acts, words, X):
        """
        Backward pass through the network
        """
        batchsize = preds.shape[0]

        # Compute part of df/dR
        Ix = gpu.garray(preds[:,:-1] - Y) / batchsize
        delta = gpu.dot(acts.T, Ix)
        dR = delta[:-1,:] + self.gamma_r * self.R
        db = delta[-1,:]
        dR = dR.as_numpy_array()

        # Compute df/dC and word inputs for df/dR
        Ix = gpu.dot(Ix, self.R.T)
        dC = gpu.zeros(np.shape(self.C))
        for i in range(self.context):
            delta = gpu.dot(words[:,:,i].T, Ix)
            dC[i,:,:] = delta + self.gamma_c * self.C[i,:,:]
            delta = gpu.dot(Ix, self.C[i,:,:].T)
            delta = delta.as_numpy_array()
            for j in range(X.shape[0]):
                dR[:,X[j,i]] = dR[:,X[j,i]] + delta.T[:,j]

        self.dR = gpu.garray(dR)
        self.db = db
        self.dC = dC
 def get_all_dists(self, query_id):
     v = self.index[query_id] # normalized
     if self._metric == 'angular':
         dists = -gnumpy.dot(self.index, v)
     elif self._metric == 'euclidean':
         dists = self.lengths - 2 * gnumpy.dot(self.index, v)
     return dists.as_numpy_array()
Exemple #19
0
    def get_output(self, input):
        past = []
        # append past_input
        if self.input is not None:
            past.append(self.input)
                
        # set input
        self.input = input

        # append past_hidden
        if self.hidden is not None:
            past.append(self.hidden)

        # set hidden
        if self.hidden is None:
            self.hidden = np.zeros(self.n_hidden)

        #print self.hidden
        self.hidden = self.a_hidden(gnp.dot(self.u.T, self.input) + 
                                    gnp.dot(self.w.T, self.hidden))
        
        # set output
        #print "dot", np.dot(self.v.T, self.hidden)
        self.output = self.a_output(gnp.dot(self.v.T, self.hidden))

        # append past_data (store (self.trun + 1) past's)
        if len(past) != 0:
            self.past_data.append(past)
            if len(self.past_data) > (self.trun + 1):
                self.past_data.pop(0)

        return self.output
Exemple #20
0
    def _cd_update_terms(self, vis, model_vis, model_p_vis):
        """Returns (weights update, visible bias update, hidden bias update) given
        visible states from the data vis, visible states sampled from the 
        model model_vis and the probability of the visible units being active         
        from the model."""
        #print "vis.shape:                ", vis.shape
        #print "p_hid(vis).shape:         ", self.p_hid(vis).shape
        #print "model_p_vis.shape:        ", model_p_vis.shape
        #print "p_hid(model_p_vis).shape: ", self.p_hid(model_p_vis).shape
        
        # my update rule:
        #dweights = (gp.dot(vis.T, self.p_hid(vis)) - 
        #            gp.dot(model_p_vis.T, self.p_hid(model_vis)))
        #dbias_vis = gp.sum(vis, axis=0) - gp.sum(model_p_vis, axis=0)
        #dbias_hid = (gp.sum(self.p_hid(vis), axis=0) - 
        #             gp.sum(self.p_hid(model_vis), axis=0))

        # deep learning update rule:
        dweights = (gp.dot(vis.T, self.p_hid_given_vis(vis)) - 
                    gp.dot(model_vis.T, self.p_hid_given_vis(model_vis)))
        dbias_vis = gp.sum(vis, axis=0) - gp.sum(model_vis, axis=0)
        dbias_hid = (gp.sum(self.p_hid_given_vis(vis), axis=0) - 
                     gp.sum(self.p_hid_given_vis(model_vis), axis=0))

        n_samples = vis.shape[0]
        return (dweights / n_samples, 
                dbias_vis / n_samples, 
                dbias_hid / n_samples)
def mlpSingleOutput1Layer_costfunc(x, *args):
    inputSize, l1Size, lambda_hidden, inputs, targets = args
    numCases = shape(inputs)[1]
    num_weights_L1 = l1Size * (inputSize + 1)
    inputs = gpu.garray(inputs)
    targets = gpu.garray(targets)
    theta_L1 = gpu.garray(reshape(x[0:num_weights_L1], (l1Size, inputSize + 1)))
    theta_output = gpu.garray(reshape(x[num_weights_L1:shape(x)[0]], (1, l1Size+1)))
    inputs = gpu.concatenate((gpu.ones((1,numCases)), inputs), axis = 0)
    hidden_sum_L1 = gpu.dot(theta_L1, inputs)
    hidden_activation_L1 = hidden_sum_L1.logistic()
    hidden_activation_L1 = gpu.concatenate((gpu.ones((1,numCases)), hidden_activation_L1), axis = 0)
    #hidden_activation_L1 = hidden_activation_L1 * dropout_prob
    hidden_sum_output = gpu.dot(theta_output, hidden_activation_L1)
    outputs = hidden_sum_output.logistic()
    output_target_diff = (outputs - targets)**2
    regularized_penalty_output = theta_output[:,1:shape(theta_output)[1]]
    regularized_penalty_output = regularized_penalty_output * regularized_penalty_output
    regularized_penalty_L1 = theta_L1[:,1:shape(theta_L1)[1]]
    regularized_penalty_L1 = regularized_penalty_L1 * regularized_penalty_L1
    cost = gpu.sum(output_target_diff)/(2*numCases) + 0.5 * lambda_hidden*(gpu.sum(regularized_penalty_L1)+gpu.sum(regularized_penalty_output))
    print 'Multilayer Preceptron Cost:', cost
    del inputs
    del theta_L1
    del hidden_sum_L1
    del hidden_activation_L1
    del regularized_penalty_output
    del regularized_penalty_L1
    gpu.free_reuse_cache()
    return cost
    def fpropDropout(self, inputBatch, weightsToStopBefore=None):
        """
		Perform a (possibly partial) forward pass through the
		network. Updates self.state which, on a full forward pass,
		holds the input followed by each hidden layer's activation and
		finally the net input incident on the output layer. For a full
		forward pass, we return the actual output unit activations. In
		a partial forward pass we return None.
		"""
        if weightsToStopBefore == None:
            weightsToStopBefore = len(self.weights)
        #self.state holds everything before the output nonlinearity, including the net input to the output units
        self.state = [
            inputBatch * (gnp.rand(*inputBatch.shape) > self.dropouts[0])
        ]
        for i in range(min(len(self.weights) - 1, weightsToStopBefore)):
            dropoutMultiplier = 1.0 / (1.0 - self.dropouts[i])
            curActs = self.hidActFuncts[i].activation(
                gnp.dot(dropoutMultiplier * self.state[-1], self.weights[i]) +
                self.biases[i])
            self.state.append(
                curActs * (gnp.rand(*curActs.shape) > self.dropouts[i + 1]))
        if weightsToStopBefore >= len(self.weights):
            dropoutMultiplier = 1.0 / (1.0 - self.dropouts[-1])
            self.state.append(
                gnp.dot(dropoutMultiplier * self.state[-1], self.weights[-1]) +
                self.biases[-1])
            self.acts = self.outputActFunct.activation(self.state[-1])
    def fprop_xf(self, inputBatch, weightsToStopBefore=None):
        """
		Only used during feature dumping after the network has been trained.

		Perform a (possibly partial) forward pass through the
		network. Updates self.state which, on a full forward pass,
		holds the input followed by each hidden layer's activation and
		finally the net input incident on the output layer. Note that state does NOT constrain
		the activation of the output layer. For a full
		forward pass, we return the actual output unit activations. In
		a partial forward pass we return None.
		"""
        if weightsToStopBefore == None:
            weightsToStopBefore = len(self.weights)
        #self.state holds everything before the output nonlinearity, including the net input to the output units
        self.state = [inputBatch]
        for i in range(min(len(self.weights) - 1, weightsToStopBefore)):
            curActs = self.hidActFuncts[i].activation(
                gnp.dot(self.state[-1], self.weights[i]) + self.biases[i])
            self.state.append(curActs)

        if weightsToStopBefore >= len(self.weights):
            self.state.append(
                gnp.dot(self.state[-1], self.weights[-1]) + self.biases[-1])
            self.acts = self.outputActFunct.activation(self.state[-1])
            return self.acts
        #we didn't reach the output units
        # To return the first set of hidden activations, we would set
        # weightsToStopBefore to 1.
        return self.state[weightsToStopBefore]
Exemple #24
0
Fichier : dnn.py Projet : C2Tao/HMM
 def update(self):
     self.w *= self.l2reg
     if self.dropout > 0:
         self.w -= gpu.dot((self.x * self.r).T, self.d) * self.learn  # / self.q
     else:
         self.w -= gpu.dot(self.x.T, self.d) * self.learn  # / self.q
     self.b *= self.l2reg
     self.b -= gpu.sum(self.d, 0) * self.learn
Exemple #25
0
 def energy(self, vis, hid):
     assert hid.ndim == 2
     #return (vis * self.vbias[nax, :]).sum(1) + \
     #       (hid * self.hbias[nax, :]).sum(1) + \
     #           (vis[:, :, nax] * self.weights[nax, :, :] * hid[:, nax, :]).sum(2).sum(1)
     return gnp.dot(vis, self.vbias) + \
            gnp.dot(hid, self.hbias) + \
            gnp.sum(vis * gnp.dot(hid, self.weights.T), 1)
    def fobos_nn(self, w):
        nu = self.tau * self.lr
        u, s, vt = linalg.svd(w, full_matrices=0, compute_uv=1)
        sdash = np.maximum(s - nu, 0)
        sdashzeros = np.diag(sdash)
#       sdashzeros = np.zeros(u.shape, dtype=np.float)
#       sdashzeros[:sdashtemp.shape[0], :sdashtemp.shape[1]] = sdashtemp
        return gnp.dot(gnp.garray(u), gnp.dot(gnp.garray(sdashzeros), gnp.garray(vt))).as_numpy_array(), s
Exemple #27
0
 def energy(self, vis, hid):
     assert hid.ndim == 2
     #return (vis * self.vbias[nax, :]).sum(1) + \
     #       (hid * self.hbias[nax, :]).sum(1) + \
     #           (vis[:, :, nax] * self.weights[nax, :, :] * hid[:, nax, :]).sum(2).sum(1)
     return gnp.dot(vis, self.vbias) + \
            gnp.dot(hid, self.hbias) + \
            gnp.sum(vis * gnp.dot(hid, self.weights.T), 1)
 def fobos_nn(self, w):
     nu = self.tau * self.lr
     u, s, vt = randomized_svd(w, w.shape[0])
     sdash = np.maximum(s - nu, 0)
     sdashtemp = np.diag(sdash)
     sdashzeros = np.zeros(u.shape, dtype=np.float)
     sdashzeros[:sdashtemp.shape[0], :sdashtemp.shape[1]] = sdashtemp
     return gnp.dot(gnp.garray(u), gnp.dot(gnp.garray(sdashzeros), gnp.garray(vt))).as_numpy_array(), s
Exemple #29
0
def run_gnumpy(a, b):
    st_g = time()
    len_a = gnumpy.dot(a, a)
    len_b = gnumpy.dot(b, b)
    res = gnumpy.dot(a, b) / (len_a * len_b)
    et_g = time()
    print res
    return et_g - st_g
def grad_costfunc_gpu_ReLU(x, *args):
    num_input, num_hidden, num_output, inputs, lambda_val, sparsityParam, beta = args
    num_weights1 = (num_input + 1) * num_hidden
    num_weights2 = (num_hidden + 1) * num_output
    x = gpu.garray(x)
    inputs = gpu.garray(inputs)
    weights1 = x[0:num_weights1].reshape((num_hidden, num_input + 1))
    weights2 = x[num_weights1:shape(x)[0]].reshape(
        (num_output, num_hidden + 1))
    nData = shape(inputs)[1]
    data = gpu.concatenate((gpu.ones((1, nData)), inputs), axis=0)
    hidden_sum = gpu.dot(weights1, data)
    #hidden_activation = gpu.log(1+hidden_sum.exp())
    relu_mask_hidden1 = gpu.ones(shape(hidden_sum)) * (hidden_sum > 0)
    hidden_activation = hidden_sum * relu_mask_hidden1
    #hidden_derivative = hidden_sum.logistic()
    hidden_derivative = relu_mask_hidden1
    hidden_activation = gpu.concatenate((gpu.ones(
        (1, nData)), hidden_activation),
                                        axis=0)
    hidden_derivative = gpu.concatenate((gpu.ones(
        (1, nData)), hidden_derivative),
                                        axis=0)
    outputs = gpu.dot(weights2, hidden_activation)
    weights1_grad = gpu.zeros(shape(weights1))
    weights2_grad = gpu.zeros(shape(weights2))
    p = outputs - inputs
    weights2_grad += gpu.dot(
        p, gpu.garray(transpose(hidden_activation.as_numpy_array())))
    q_temp = gpu.dot(gpu.garray(transpose(weights2.as_numpy_array())), p)
    #q = multiply(multiply(q_temp,hidden_activation),(1-hidden_activation))
    q = q_temp * hidden_derivative
    delta2 = gpu.dot(q, gpu.garray(transpose(data.as_numpy_array())))
    weights1_grad += delta2[1:shape(delta2)[0], :]
    weights1_grad = weights1_grad / nData
    weights2_grad = weights2_grad / nData
    weights1_grad[:, 1:shape(weights1_grad)[1]] = weights1_grad[:, 1:shape(
        weights1_grad)[1]] + weights1[:, 1:shape(weights1)[1]] * lambda_val
    weights2_grad[:, 1:shape(weights2_grad)[1]] = weights2_grad[:, 1:shape(
        weights2_grad)[1]] + weights2[:, 1:shape(weights2)[1]] * lambda_val
    #weights1_grad = reshape(weights1_grad, num_weights1)
    weights1_grad = weights1_grad.reshape(num_weights1)
    #weights2_grad = reshape(weights2_grad, num_weights2)
    weights2_grad = weights2_grad.reshape(num_weights2)
    del x
    del inputs
    del data
    del p
    del q_temp
    del q
    del delta2
    del hidden_sum
    del hidden_activation
    del weights1
    del weights2
    gpu.free_reuse_cache()
    return hstack(
        (weights1_grad.as_numpy_array(), weights2_grad.as_numpy_array()))
def mlpSoftmax1Layer_grad(x, *args):
    numClasses, inputSize, l1Size, lambda_softmax, lambda_hidden, inputs, groundTruth = args
    numCases = shape(inputs)[1]
    num_weights_L1 = l1Size * (inputSize + 1)
    num_weights_softmax = numClasses * l1Size
    inputs = gpu.garray(inputs)
    theta_L1 = gpu.garray(reshape(x[0:num_weights_L1],
                                  (l1Size, inputSize + 1)))
    theta_softmax = gpu.garray(
        reshape(x[num_weights_L1:shape(x)[0]], (numClasses, l1Size)))
    theta_L1_grad = gpu.zeros(shape(theta_L1))
    inputs = gpu.concatenate((gpu.ones((1, numCases)), inputs), axis=0)
    hidden_sum_L1 = gpu.dot(theta_L1, inputs)
    #hidden_activation_L1 = gpu.log(1+hidden_sum_L1.exp())
    #hidden_derivative_L1 = hidden_sum_L1.logistic()
    relu_mask_hidden1 = gpu.ones(shape(hidden_sum_L1)) * (hidden_sum_L1 > 0)
    hidden_activation_L1 = hidden_sum_L1 * relu_mask_hidden1
    #hidden_activation_L1 = hidden_sum_L1.logistic()
    hidden_derivative_L1 = relu_mask_hidden1
    hidden_sum_softmax_imd = gpu.dot(theta_softmax, hidden_activation_L1)
    hidden_sum_softmax = hidden_sum_softmax_imd - hidden_sum_softmax_imd.max(
        axis=0)
    predictions = hidden_sum_softmax.exp()
    predictions = predictions / gpu.sum(predictions, axis=0)
    softmax_imd = groundTruth - predictions
    theta_softmax_grad = -1 * gpu.dot(
        softmax_imd,
        gpu.garray(transpose(hidden_activation_L1.as_numpy_array()))
    ) / numCases + lambda_softmax * theta_softmax
    deltaOut = -softmax_imd
    delta_L1_imd = gpu.dot(
        gpu.garray(transpose(theta_softmax.as_numpy_array())), deltaOut)
    delta_L1_imd2 = delta_L1_imd * hidden_derivative_L1
    #delta_L1_imd2 = (delta_L1_imd*hidden_activation_L1)*(1-hidden_activation_L1)
    delta_L1 = gpu.dot(delta_L1_imd2,
                       gpu.garray(transpose(inputs.as_numpy_array())))
    theta_L1_grad += delta_L1
    theta_L1_grad = theta_L1_grad / numCases
    theta_L1_grad[:, 1:shape(theta_L1_grad)[1]] = theta_L1_grad[:, 1:shape(
        theta_L1_grad)[1]] + theta_L1[:, 1:shape(theta_L1)[1]] * lambda_hidden
    theta_L1_grad = reshape(theta_L1_grad.as_numpy_array(), num_weights_L1)
    theta_softmax_grad = reshape(theta_softmax_grad.as_numpy_array(),
                                 num_weights_softmax)
    del inputs
    del theta_L1
    del theta_softmax
    del hidden_sum_L1
    del hidden_activation_L1
    del hidden_sum_softmax
    del predictions
    del softmax_imd
    del deltaOut
    del delta_L1_imd
    del delta_L1_imd2
    del delta_L1
    gpu.free_reuse_cache()
    return hstack((theta_L1_grad, theta_softmax_grad))
Exemple #32
0
 def update(self):
     self.w *= self.l2reg
     if self.dropout > 0:
         self.w -= gpu.dot(
             (self.x * self.r).T, self.d) * self.learn  # / self.q
     else:
         self.w -= gpu.dot(self.x.T, self.d) * self.learn  # / self.q
     self.b *= self.l2reg
     self.b -= gpu.sum(self.d, 0) * self.learn
Exemple #33
0
 def input_to_hidden(self, set_name = 'train'):
     self.timer_logger('input_to_hidden {0}'.format(type), time.time()) 
     self.results['activations'] = []     
     if set_name == 'train':            
         self.results['activations'].append([self.batch, self.w[0], self.b[0]])   
         dropped_out = self.batch * (gpu.rand(self.current_batch_size,self.X.shape[1]) > self.dropout[0]) 
         self.results['current']  = gpu.dot(dropped_out,self.w[0])+self.b[0]
     else:                               
         self.results['current'] = gpu.dot(self.batch,self.w[0]) + self.b[0]
     self.timer_logger('input_to_hidden {0}'.format(type), time.time()) 
Exemple #34
0
def mlpSoftmax_costfunc(x, *args):
    numClasses, inputSize, l1Size, l2Size, lambda_softmax, lambda_hidden, inputs, labels, groundTruth = args
    numCases = shape(inputs)[1]
    num_weights_L1 = l1Size * (inputSize + 1)
    num_weights_L2 = l2Size * (l1Size + 1)
    #x = gpu.garray(x)
    inputs = gpu.garray(inputs)
    theta_L1 = gpu.garray(reshape(x[0:num_weights_L1],
                                  (l1Size, inputSize + 1)))
    #theta_L1 = x[0:num_weights_L1].reshape((l1Size, inputSize + 1))
    #print numClasses, l2Size
    theta_L2 = gpu.garray(
        reshape(x[num_weights_L1:num_weights_L2 + num_weights_L1],
                (l2Size, l1Size + 1)))
    #theta_L2 = x[num_weights_L1:num_weights_L2+num_weights_L1].reshape((l2Size, l1Size + 1))
    theta_softmax = gpu.garray(
        reshape(x[num_weights_L2 + num_weights_L1:shape(x)[0]],
                (numClasses, l2Size)))
    #theta_softmax = x[num_weights_L2+num_weights_L1:shape(x)[0]].reshape((numClasses, l2Size))
    inputs = gpu.concatenate((gpu.ones((1, numCases)), inputs), axis=0)
    hidden_sum_L1 = gpu.dot(theta_L1, inputs)
    hidden_activation_L1 = hidden_sum_L1.logistic()
    hidden_activation_L1 = gpu.concatenate((gpu.ones(
        (1, numCases)), hidden_activation_L1),
                                           axis=0)
    hidden_sum_L2 = gpu.dot(theta_L2, hidden_activation_L1)
    hidden_activation_L2 = hidden_sum_L2.logistic()
    hidden_sum_softmax = gpu.dot(theta_softmax, hidden_activation_L2)
    hidden_sum_softmax = hidden_sum_softmax - hidden_sum_softmax.max(axis=0)
    predictions = hidden_sum_softmax.exp()
    predictions = predictions / gpu.sum(predictions, axis=0)
    temp = groundTruth * gpu.log(predictions)
    regularized_penalty_L1 = theta_L1[:, 1:shape(theta_L1)[1]]
    regularized_penalty_L2 = theta_L2[:, 1:shape(theta_L2)[1]]
    regularized_penalty_L1 = regularized_penalty_L1 * regularized_penalty_L1
    regularized_penalty_L2 = regularized_penalty_L2 * regularized_penalty_L2
    cost = -1 * gpu.sum(temp) / numCases + 0.5 * lambda_hidden * (
        gpu.sum(regularized_penalty_L1) + gpu.sum(regularized_penalty_L2)
    ) + 0.5 * lambda_softmax * gpu.sum(theta_softmax * theta_softmax)
    print 'Multilayer Softmax Cost:', cost
    del inputs
    del theta_L1
    del theta_L2
    del theta_softmax
    del hidden_sum_L1
    del hidden_activation_L1
    del hidden_sum_L2
    del hidden_activation_L2
    del hidden_sum_softmax
    del predictions
    del temp
    del regularized_penalty_L1
    del regularized_penalty_L2
    gpu.free_reuse_cache()
    return cost
Exemple #35
0
    def feedforward(self, train_set_x):
        self.activations = []

        self.activations.append(train_set_x)

        for i in range(self.n_layers):
            current_activations = gnp.tanh(gnp.dot(self.activations[i], self.W_params[i]) + self.b_params[i])
            self.activations.append(current_activations)

        #output layers
        self.final_layer_output = gnp.dot(self.activations[self.n_layers], self.W_params[self.n_layers]) + self.b_params[self.n_layers]
Exemple #36
0
    def parameter_prediction(self, test_set_x):
        test_set_x = gnp.as_garray(test_set_x)

        current_activations = test_set_x

        for i in range(self.n_layers):
            current_activations = gnp.tanh(gnp.dot(current_activations, self.W_params[i]) + self.b_params[i])

        final_layer_output = gnp.dot(current_activations, self.W_params[self.n_layers]) + self.b_params[self.n_layers]

        return  final_layer_output.as_numpy_array()
Exemple #37
0
    def feedforward(self, train_set_x):
        self.activations = []

        self.activations.append(train_set_x)

        for i in range(self.n_layers):
            current_activations = gnp.tanh(gnp.dot(self.activations[i], self.W_params[i]) + self.b_params[i])
            self.activations.append(current_activations)

        #output layers
        self.final_layer_output = gnp.dot(self.activations[self.n_layers], self.W_params[self.n_layers]) + self.b_params[self.n_layers]
def nn_forward_pass(x, w, b, return_all=True):
    """
    Forward pass for multilayer feed-forward sigmoid neural network
    
    Hidden units have sigmoid non-linearity. 
    Output is soft-max.

    x: DxN matrix of input data
    w: Weights. List of weight matrices for each layer.
    b: Biases. List of bias vectors for each layer
    return_all: If True, returns hidden unit activations for each layer. If False
        just returns the output layer activations
    Returns a list h where each element is a matrix containing the activations
    for that layer. h[0] is input data x. 
    """
    # ---- TEMP HACK --------------
    # I should find a more seamless way of running in mixed (some operations
    # with numpy, some with gnumpy) mode.
    # I had to resort to this, because i needed the validation classification
    # step in nn_train to run on CPU with numpy. GPU ran out of memory.
    if isinstance(x, gnp.garray):
        use_gpu = True
    else:
        use_gpu = False

    layer_count = len(w)
    if return_all:
        hs = [x]  # unit activations for each layer
    h = x

    # all layers except the output layer
    for l in range(layer_count - 1):
        if use_gpu:
            a = gnp.dot(w[l].T, h) + b[l]
            h = gnp.logistic(a)
        else:
            a = np.dot(gnp.as_numpy_array(w[l]).T, h) + gnp.as_numpy_array(b[l])
            h = 1.0 / (1 + np.exp(-a))
        if return_all:
            hs.append(h)

    # output layer
    if use_gpu:
        h = gnp.dot(w[-1].T, h) + b[-1]
        h = gnp.exp(h) / gnp.sum(gnp.exp(h), axis=0)  # soft-max
    else:
        h = np.dot(gnp.as_numpy_array(w[-1]).T, h) + gnp.as_numpy_array(b[-1])
        h = np.exp(h) / np.sum(np.exp(h), axis=0)  # soft-max

    if return_all:
        hs.append(h)
        return hs
    else:
        return h
Exemple #39
0
    def parameter_prediction(self, test_set_x):
        test_set_x = gnp.as_garray(test_set_x)

        current_activations = test_set_x

        for i in range(self.n_layers):
            current_activations = gnp.tanh(gnp.dot(current_activations, self.W_params[i]) + self.b_params[i])

        final_layer_output = gnp.dot(current_activations, self.W_params[self.n_layers]) + self.b_params[self.n_layers]

        return  final_layer_output.as_numpy_array()
Exemple #40
0
    def backward_pass(self, state, dOX, R_HX=None, mu_times_lambda=0.):
        """
The backward pass (or the L-op). Given the gradients wrt the output
units and the state, compute the implied derivative wrt the parameters.
If R_HX is given, then structural damping will be added. 
"""

        V, H, OX = state
        if V[0] is not None:
            V = [None] + V
        if OX[0] is not None:
            OX = [None] + OX
        if dOX[0] is not None:
            dOX = [None] + dOX

            
        assert V[0] is None
        T = len(V)-1

        grad = self.unpack(self.pack() * 0)


        dH_1t = H[-1] * 0 
        for t in reversed(range(1, T+1)):

            dH_t = dH_1t 

            dH_t += g.dot(dOX[t], self.W_ho.T) 
            grad.W_ho += g.dot(H[t].T, dOX[t])


            
            ## backpropagate the nonlinearity: at this point, dHX_t, the gradinet
            ## wrt the total inputs to H_t, is correct.
            dHX_t = dH_t * self.hid_nonlin.grad_y(H[t])

            ## THIS IS THE ONLY LINE THAT HAS ANYTHING TO DO WITH STRUCTURAL
            ## DAMPING. Pretty cool :-)
            if R_HX is not None:
                dHX_t += float(mu_times_lambda) * \
                    self.struct_damp_nonlin.H_prod(R_HX[t], H[t], 1)


            dH_1t = g.dot(dHX_t, self.W_hh.T)

            grad.W_hh += g.dot(H[t-1].T, dHX_t)
            grad.W_vh += g.dot(V[t].T, dHX_t)



        grad.h_init += dH_1t.sum(0)

        return grad
def dbn_supervised_predict_exact(ws_vh, ws_v, ws_h, x):
    """
    Predict the class label of input x from supervised DBN
    Uses the exact method mentioned in section 6.2 of Hinton, Osindero, Teh 2006
    The free energy formula is taken from http://deeplearning.net/tutorial/rbm.html
    
    x: Input data. (NxD matrix)
    """
    L = len(ws_vh)
    N = x.shape[0]

    # make a forward pass to get from input layer to visible layer of top level
    # RBM
    h_prev = x.T

    # forward (bottom-up) pass, (use deterministic (we pass the activations, not
    # the stochastically sampled steps) forward pass)
    for l in range(L - 1):
        ah = gnp.dot(ws_vh[l].T, h_prev) + ws_h[l]
        h_prev = gnp.logistic(ah)

    H = ws_vh[-1].shape[0]  # number of visible units top level RBM
    Hx = h_prev.shape[0]  # number of hidden units in the penultimate layer
    K = H - Hx
    # (H - Hx) is the number of supervised inputs to top level RBM

    # for every class, assume it is the correct label and calculate its free energy
    y = gnp.zeros((K, N))
    free_energy = gnp.zeros((N, K))  # we actually calculate -free_energy
    for k in range(K):
        # set the current assumed class label
        y[k, :] = 1.0

        # visible unit vector
        v = gnp.concatenate((y, h_prev))
        e_v = gnp.dot(ws_v[-1].T, v)  # bias energy term

        ah = gnp.dot(ws_vh[-1].T, v) + ws_h[-1]
        e_h = gnp.sum(gnp.log(gnp.exp(ah) + 1.0), axis=0)

        free_energy[:, k] = e_v + e_h

        # zero the class labels for next iteration
        y[:, :] = 0.0

    # since these numbers may get pretty small, use the sum-exp trick for converting
    # these to probabilities
    pred_y = (
        gnp.exp(free_energy - gnp.max(free_energy, axis=1)[:, gnp.newaxis])
        / gnp.sum(gnp.exp(free_energy - gnp.max(free_energy, axis=1)[:, gnp.newaxis]), axis=1)[:, gnp.newaxis]
    )

    return pred_y
Exemple #42
0
def exact_moments(rbm, batch_units=10, show_progress=False):
    expect_vis = gnp.zeros(rbm.nvis)
    expect_hid = gnp.zeros(rbm.nhid)
    expect_prod = gnp.zeros((rbm.nvis, rbm.nhid))

    for hid, p in iter_configurations(rbm, batch_units=batch_units, show_progress=show_progress):
        cond_vis = gnp.logistic(rbm.vis_inputs(hid))
        expect_vis += gnp.dot(p, cond_vis)
        expect_hid += gnp.dot(p, hid)
        expect_prod += gnp.dot(cond_vis.T * p, hid)

    return binary_rbms.Moments(expect_vis, expect_hid, expect_prod)
Exemple #43
0
 def fprop(self, inputBatch, weightsToStopBefore = None ):
     inputBatch = inputBatch if isinstance(inputBatch, gnp.garray) else gnp.garray(inputBatch)
     if weightsToStopBefore == None:
         weightsToStopBefore = len(self.weights)
     self.state = [inputBatch]
     for i in range(min(len(self.weights) - 1, weightsToStopBefore)):
         curActs = self.hidActFuncts[i].activation(gnp.dot(self.state[-1], self.weights[i]) + self.biases[i])
         self.state.append(curActs)
     if weightsToStopBefore >= len(self.weights):
         self.state.append(gnp.dot(self.state[-1], self.weights[-1]) + self.biases[-1])
         self.acts = self.outputActFunct.activation(self.state[-1])
         return self.acts
     return self.state[weightsToStopBefore]
Exemple #44
0
def CD1(vis, visToHid, visBias, hidBias, visUnit = Binary(), hidUnit = Binary()):

    posHid = hidUnit.activate(gnp.dot(vis, visToHid) + hidBias)
    posHidStates = hidUnit.sampleStates(posHid)
    
    negVis = visUnit.activate(gnp.dot(posHidStates, visToHid.T) + visBias)
    negHid = hidUnit.activate(gnp.dot(negVis, visToHid) + hidBias)
    
    visHidStats = gnp.dot(vis.T, posHid) - gnp.dot(negVis.T, negHid)
    visBiasStats = vis.sum(axis=0).reshape(*visBias.shape) - negVis.sum(axis=0).reshape(*visBias.shape)
    hidBiasStats = posHid.sum(axis=0).reshape(*hidBias.shape) - negHid.sum(axis=0).reshape(*hidBias.shape)

    return visHidStats, hidBiasStats, visBiasStats, negVis
def costfunc_gpu(x, *args):
    num_input, num_hidden, num_output, inputs, noNoiseData, lambda_val, sparsityParam, beta = args
    num_weights1 = (num_input + 1) * num_hidden
    x = gpu.garray(x)
    #    randomNoise = random.random_sample(shape(inputs))
    #    criteriaTable = randomNoise > 0.32
    #    inputs = inputs * criteriaTable
    inputs = gpu.garray(inputs)
    noNoiseData = gpu.garray(noNoiseData)
    #weights1 = gpu.garray(reshape(x[0:num_weights1],(num_hidden,num_input+1)))
    weights1 = x[0:num_weights1].reshape((num_hidden, num_input + 1))
    #weights2 = gpu.garray(reshape(x[num_weights1:shape(x)[0]], (num_output,num_hidden+1)))
    weights2 = x[num_weights1:shape(x)[0]].reshape(
        (num_output, num_hidden + 1))
    nData = shape(inputs)[1]
    data = gpu.concatenate((gpu.ones((1, nData)), inputs), axis=0)
    hidden_sum = gpu.dot(weights1, data)
    hidden_activation = hidden_sum.logistic()
    p_avg = gpu.sum(hidden_activation, axis=1) / nData
    hidden_activation = gpu.concatenate((gpu.ones(
        (1, nData)), hidden_activation),
                                        axis=0)
    output = gpu.dot(weights2, hidden_activation)
    regularized_penalty1 = weights1[:, 1:shape(weights1)[1]]
    regularized_penalty2 = weights2[:, 1:shape(weights2)[1]]
    regularized_penalty1 = regularized_penalty1 * regularized_penalty1
    regularized_penalty2 = regularized_penalty2 * regularized_penalty2
    output_target_diff = (output - noNoiseData) * (output - noNoiseData)
    KL = gpu.sum(sparsityParam * gpu.log(sparsityParam / p_avg) +
                 (1 - sparsityParam) * gpu.log((1 - sparsityParam) /
                                               (1 - p_avg)))
    cost = gpu.sum(output_target_diff) / (2 * nData) + 0.5 * lambda_val * (
        gpu.sum(regularized_penalty1) +
        gpu.sum(regularized_penalty2)) + beta * KL
    print 'GPU Linear Denoising Decoder Cost: ', cost
    del x
    del inputs
    del noNoiseData
    del data
    del hidden_sum
    del hidden_activation
    del p_avg
    del output
    del regularized_penalty1
    del regularized_penalty2
    del weights1
    del weights2
    del output_target_diff
    gpu.free_reuse_cache()
    return cost
Exemple #46
0
def grad_costfunc_gpu(x, *args):
    num_input,num_hidden,num_output,inputs,lambda_val,sparsityParam,beta = args
    num_weights1 = (num_input+1)*num_hidden
    num_weights2 = (num_hidden+1)*num_output
    x = gpu.garray(x)
    inputs = gpu.garray(inputs)
    weights1 = x[0:num_weights1].reshape((num_hidden,num_input+1))
    weights2 = x[num_weights1:shape(x)[0]].reshape((num_output,num_hidden+1))
    nData = shape(inputs)[1]
    data = gpu.concatenate((gpu.ones((1,nData)), inputs), axis = 0)
    hidden_sum = gpu.dot(weights1, data)
    hidden_activation = hidden_sum.logistic()
    p_avg = gpu.sum(hidden_activation,axis=1)/nData
    grad_sparse = -1*sparsityParam/p_avg.as_numpy_array() + (1-sparsityParam)/(1-p_avg.as_numpy_array())
    grad_sparse = append(0,grad_sparse)
    grad_sparse = tile(grad_sparse, (nData, 1))
    grad_sparse = gpu.garray(transpose(grad_sparse))
    hidden_activation = gpu.concatenate((gpu.ones((1,nData)), hidden_activation), axis = 0)
    outputs = gpu.dot(weights2, hidden_activation)
    weights1_grad = gpu.zeros(shape(weights1))
    weights2_grad = gpu.zeros(shape(weights2))
    p = outputs-inputs
    weights2_grad += gpu.dot(p, gpu.garray(transpose(hidden_activation.as_numpy_array())))
    q_temp = gpu.dot(gpu.garray(transpose(weights2.as_numpy_array())),p) + beta*grad_sparse
    #q = multiply(multiply(q_temp,hidden_activation),(1-hidden_activation))
    q = (q_temp*hidden_activation)*(1-hidden_activation)
    delta2 = gpu.dot(q, gpu.garray(transpose(data.as_numpy_array())))
    weights1_grad += delta2[1:shape(delta2)[0], :]
    weights1_grad = weights1_grad/nData
    weights2_grad = weights2_grad/nData
    weights1_grad[:,1:shape(weights1_grad)[1]] = weights1_grad[:,1:shape(weights1_grad)[1]] + weights1[:,1:shape(weights1)[1]] * lambda_val
    weights2_grad[:,1:shape(weights2_grad)[1]] = weights2_grad[:,1:shape(weights2_grad)[1]] + weights2[:,1:shape(weights2)[1]] * lambda_val
    #weights1_grad = reshape(weights1_grad, num_weights1)
    weights1_grad = weights1_grad.reshape(num_weights1)
    #weights2_grad = reshape(weights2_grad, num_weights2)
    weights2_grad = weights2_grad.reshape(num_weights2)
    del x
    del inputs
    del data
    del grad_sparse
    del p
    del q_temp
    del q
    del delta2
    del hidden_sum
    del hidden_activation
    del weights1
    del weights2
    gpu.free_reuse_cache()
    return hstack((weights1_grad.as_numpy_array(),weights2_grad.as_numpy_array()))
Exemple #47
0
 def input_to_hidden(self, set_name='train'):
     self.timer_logger('input_to_hidden {0}'.format(type), time.time())
     self.results['activations'] = []
     if set_name == 'train':
         self.results['activations'].append(
             [self.batch, self.w[0], self.b[0]])
         dropped_out = self.batch * (gpu.rand(
             self.current_batch_size, self.X.shape[1]) > self.dropout[0])
         self.results['current'] = gpu.dot(dropped_out,
                                           self.w[0]) + self.b[0]
     else:
         self.results['current'] = gpu.dot(self.batch,
                                           self.w[0]) + self.b[0]
     self.timer_logger('input_to_hidden {0}'.format(type), time.time())
 def fpropDropout(self,
                  inputBatch,
                  useDropout=False,
                  weightsToStopBefore=None):
     """
     Perform a (possibly partial) forward pass through the
     network. Updates self.state which, on a full forward pass,
     holds the input followed by each hidden layer's activation and
     finally the net input incident on the output layer. For a full
     forward pass, we return the actual output unit activations. In
     a partial forward pass we return None.
     If useDropout == True, ranomly drop units for each layer. 
     """
     inputBatch = inputBatch if isinstance(
         inputBatch, gnp.garray) else gnp.garray(inputBatch)
     if weightsToStopBefore == None:
         weightsToStopBefore = len(self.weights)
     self.keptMask = [gnp.rand(*inputBatch.shape) > self.dropouts[0]]
     #self.state holds everything before the output nonlinearity, including the net input to the output units
     self.state = [inputBatch * self.keptMask[0]]
     for i in range(min(len(self.weights) - 1, weightsToStopBefore)):
         if useDropout:
             dropoutMultiplier = 1.0 / (1.0 - self.dropouts[i])
             curActs = self.hidActFuncts[i].activation(
                 gnp.dot(dropoutMultiplier *
                         self.state[-1], self.weights[i]) + self.biases[i])
             self.keptMask.append(
                 gnp.rand(*curActs.shape) > self.dropouts[i + 1])
             self.state.append(curActs * self.keptMask[-1])
         else:
             curActs = self.hidActFuncts[i].activation(
                 gnp.dot(self.state[-1], self.weights[i]) + self.biases[i])
             self.state.append(curActs)
     if weightsToStopBefore >= len(self.weights):
         if useDropout:
             dropoutMultiplier = 1.0 / (1.0 - self.dropouts[-1])
             self.state.append(
                 gnp.dot(dropoutMultiplier *
                         self.state[-1], self.weights[-1]) +
                 self.biases[-1])
         else:
             self.state.append(
                 gnp.dot(self.state[-1], self.weights[-1]) +
                 self.biases[-1])
         self.acts = self.outputActFunct.activation(self.state[-1])
         return self.acts
     # If we didn't reach the output units
     # To return the first set of hidden activations, we would set
     # weightsToStopBefore to 1.
     return self.state[weightsToStopBefore]
Exemple #49
0
 def fpropDropout(self, inputBatch, weightsToStopBefore = None ):
     inputBatch = inputBatch if isinstance(inputBatch, gnp.garray) else gnp.garray(inputBatch)
     if weightsToStopBefore == None:
         weightsToStopBefore = len(self.weights)
     self.state = [inputBatch * (gnp.rand(*inputBatch.shape) > self.dropouts[0])]
     for i in range(min(len(self.weights) - 1, weightsToStopBefore)):
         dropoutMultiplier = 1.0/(1.0-self.dropouts[i])
         curActs = self.hidActFuncts[i].activation(gnp.dot(dropoutMultiplier*self.state[-1], self.weights[i]) + self.biases[i])
         self.state.append(curActs * (gnp.rand(*curActs.shape) > self.dropouts[i+1]) )
     if weightsToStopBefore >= len(self.weights):
         dropoutMultiplier = 1.0/(1.0-self.dropouts[-1])
         self.state.append(gnp.dot(dropoutMultiplier*self.state[-1], self.weights[-1]) + self.biases[-1])
         self.acts = self.outputActFunct.activation(self.state[-1])
         return self.acts
     return self.state[weightsToStopBefore]
Exemple #50
0
 def hidden_to_output(self, set_name = 'train'):   
     self.timer_logger('hidden_to_output {0}'.format(type), time.time()) 
     i = 0   
     for weight, bias in zip(self.w, self.b):
         if i > 0: #ignore the first weight that goes from inputs to first hidden layer
             if set_name == 'train':                            
                 self.results['activations'].insert(0, [self.activation(self.results['current'])   , weight])            
                 self.results['current'] = gpu.dot(self.results['activations'][0][0] * 
                                               (gpu.rand(self.results['activations'][0][0].shape[0],self.results['activations'][0][0].shape[1]) > self.dropout[1]), #dropout
                                                weight) + bias                    
             else:
                 self.results['current'] =  gpu.dot(self.activation(self.results['current'])* (1 - self.dropout[1]), weight) + bias
       
         i += 1
     self.timer_logger('hidden_to_output {0}'.format(type), time.time()) 
Exemple #51
0
    def R_forward_pass(self, state, R):


        self.R_state_X = R_state_X = [None] * len(self.sizes)

        R_state_X[0] = state[0]*0

        R_state_i = R_state_X[0]
        for i in range(len(self.sizes) - 1):
            R_state_X[i+1] =  g.dot(state[i], R.W[i]) + \
                              g.dot(R_state_i, self.W[i]) + R.b[i]

            R_state_i = self.nonlins[i].grad_y(state[i+1]) * R_state_X[i+1]

        return R_state_X[-1]
Exemple #52
0
def test_gnumpy(dat, num_epochs):
    import gnumpy as gpu
    import numpy
    import time
    # load data. <dat> is 2 dimensional: 60000 X 784
    #dat = gpu.garray(load('mnist_cudaTest').T/255.)
    # training parameters
    epsilon = 0.1
    momentum = 0.9
    batch_size = 128
    num_batches = dat.shape[0] / batch_size
    # model parameters
    num_vis = dat.shape[1]
    num_hid = 4096
    # initialize weights
    w_vh = 0.1 * gpu.randn(num_vis, num_hid)
    w_v = gpu.zeros(num_vis)
    w_h = -4. * gpu.ones(num_hid)
    # initialize weight updates
    wu_vh = gpu.zeros((num_vis, num_hid))
    wu_v = gpu.zeros(num_vis)
    wu_h = gpu.zeros(num_hid)
    for epoch in range(num_epochs):
        err = []
        tic = time.clock()
        for batch in range(num_batches):
            # positive phase
            v1 = dat[batch * batch_size:(batch + 1) * batch_size]
            h1 = (gpu.dot(v1, w_vh) + w_h).logistic()
            # sample hiddens
            hSampled = h1.rand() < h1
            # negative phase
            v2 = (gpu.dot(hSampled, w_vh.T) + w_v).logistic()
            h2 = (gpu.dot(v2, w_vh) + w_h).logistic()
            # update weights
            wu_vh = wu_vh * momentum + gpu.dot(v1.T, h1) - gpu.dot(v2.T, h2)
            wu_v = wu_v * momentum + v1.sum(0) - v2.sum(0)
            wu_h = wu_h * momentum + h1.sum(0) - h2.sum(0)

            w_vh += wu_vh * (epsilon / batch_size)
            w_v += wu_v * (epsilon / batch_size)
            w_h += wu_h * (epsilon / batch_size)
            # calculate reconstruction error
            err.append((v2 - v1).euclid_norm()**2 / (num_vis * batch_size))
        toc = time.clock()
        print "Mean squared error: %.4f, takes time: %d" % (numpy.mean(err),
                                                            toc - tic)
    return w_vh, w_v, w_h
Exemple #53
0
    def forward(self, X):
        self.X = X
        # Num of examples
        N = X.shape[0]
        # Timespan
        T = X.shape[1]
        # Windows size
        S = self.windowSize
        # Channels
        D = self.numChannels
        # Num filters
        F = self.numFilters
        Z = np.zeros((N, T - S + 1, S, D), X.dtype)
        for i in range(T - S + 1):
            Z[:, i, :, :] = X[:, i:i + S, :]
        Z = Z.reshape(N * (T - S + 1), S * D)
        if self.gpu:
            Z = gpu.as_garray(Z.astype('float32'))
            Y = gpu.dot(Z, self.W)
            Y = gpu.as_numpy_array(Y)
        else:
            Y = np.dot(Z, self.W)

        Y = Y.reshape(N, T - S + 1, F)
        self.Z = Z
        return Y
Exemple #54
0
    def pt_grad(self, params, inpts, **kwargs):
        g = gzeros(params.shape)
        m, _ = inpts.shape

        hddn = logistic(
            gpu.dot(inpts, params[: self.m_end].reshape(self.shape)) + params[self.m_end : self.m_end + self.shape[1]]
        )
        Z = gdot(hddn, params[: self.m_end].reshape(self.shape).T) + params[-self.shape[0] :]

        w = params[: self.m_end].reshape(self.shape)
        cae = gpu.sum(gpu.mean(Dsigmoid(hddn) ** 2, axis=0) * gpu.sum(w ** 2, axis=0))
        cae *= self.cae

        _, delta = self.score(Z, inpts, error=True, addon=cae)

        g[: self.m_end] = gdot(delta.T, hddn).ravel()
        g[-self.shape[0] :] = delta.sum(axis=0)

        cae_grad = gpu.mean(Dsigmoid(hddn) ** 2, axis=0) * w
        cae_grad += gdot(inpts.T, (Dsigmoid(hddn) ** 2 * (1 - 2 * hddn))) / m * gpu.sum(w ** 2, axis=0)
        g[: self.m_end] += self.cae * 2 * cae_grad.ravel()

        dsc_dha = Dsigmoid(hddn) * gdot(delta, params[: self.m_end].reshape(self.shape))

        g[: self.m_end] += gdot(inpts.T, dsc_dha).ravel()

        g[self.m_end : -self.shape[0]] = dsc_dha.sum(axis=0)
        # clean up
        del delta, hddn, Z
        return g
Exemple #55
0
def cov(x):
    y = gpu.mean(x, axis=1)[:, None]
    x = x.as_numpy_array().__sub__(y.as_numpy_array())
    x_T = x.T.conj()
    result = gpu.dot(x, x_T)
    result = result.__div__(x.shape[1] - 1)
    return result
 def check_rank(self, query_id, same_ids):
     # same_ids is the list of the ids in the same class
     v = self.index[query_id] # normalized
     if self._metric == 'angular':
         # argmax_a cossim(a, b) = argmax_a dot(a, b) / |a||b| = argmin_a -dot(a, b)
         dists = -gnumpy.dot(self.index, v)
     elif self._metric == 'euclidean':
         # argmin_a (a - b)^2 = argmin_a a^2 - 2ab + b^2 = argmin_a a^2 - 2ab
         dists = self.lengths - 2 * gnumpy.dot(self.index, v)
     else:
         assert False, "invalid metric"  # shouldn't get past the constructor!
     # this rank should start from 1, because of the self-retrieval 
     neighbor_dists = [ dists[i] for i in same_ids]
     closest_positive_dist = min(neighbor_dists)
     rank = gnumpy.sum( dists < closest_positive_dist ) 
     return int(rank)
Exemple #57
0
    def apply_update(self, pos_moments, neg_moments, rbm, weight_decay, lrate):
        assert np.allclose(lrate.vbias, lrate.hbias)

        if self.count < self.params.start_after:
            rbm.sgd_update(pos_moments, neg_moments, lrate)
            return

        # base rates
        ds = gnp.concatenate([pos_moments.expect_vis - neg_moments.expect_vis,
                              pos_moments.expect_hid - neg_moments.expect_hid])
        dbias = lrate.vbias * gnp.dot(self.Lambda, ds.as_numpy_array())
        da, db = dbias[:rbm.nvis], dbias[rbm.nvis:]

        residuals = pos_moments.expect_prod - neg_moments.expect_prod + \
                    -weight_decay * rbm.weights + \
                    -self.beta[:, :, 0] * (pos_moments.expect_vis - neg_moments.expect_vis)[:, nax] + \
                    -self.beta[:, :, 1] * (pos_moments.expect_hid - neg_moments.expect_hid)[nax, :]
        lam = 1. / self.sigma_sq

        dw = lrate.weights * lam * residuals
        da -= lrate.weights * (lam * residuals * self.beta[:, :, 0]).sum(1)
        db -= lrate.weights * (lam * residuals * self.beta[:, :, 1]).sum(0)

        update = binary_rbms.Update(da, db, dw)
        rbm += update
    def glog_l_new(self, Wmat):
        ll = 0
        n_correct = 0
        gWmat = gnp.garray(np.array(Wmat))
        for n in xrange(self.nsamples):
            #print self.Xi[n][0].shape, self.Xi[n][1].shape, self.Xi[n][2].shape, gWmat.shape
            internals = (gnp.dot(gnp.garray(self.Xi[n][0]),gnp.dot(gWmat, gnp.garray(self.Xi[n][2].T))) - gnp.dot(gnp.garray(self.Xi[n][1]),gnp.dot(gWmat, gnp.garray(self.Xi[n][2].T)))).as_numpy_array()[0]

            if logistic(internals) > 0.5 and self.Y[n] == 1:
                n_correct += 1
            elif logistic(internals) < 0.5 and self.Y[n] == -1:
                n_correct += 1

            ll +=  np.log(logistic(self.Y[n] * internals))

        return ll, n_correct / self.nsamples