def linear_decoder_run_gpu(data, numInput, numHidden):
    print "Starting Feature Abstraction..."
    num_input = numInput
    num_hidden = numHidden
    num_output = numInput
    lambda_val = 3e-3
    sparsityParam = 0.035
    beta = 5
    inputs = data
    r = gpu.sqrt(6)/gpu.sqrt(num_hidden+num_input+1)
    weights1 = (gpu.rand(num_hidden,num_input+1))*2*r-r
    weights2 = (gpu.rand(num_output,num_hidden+1))*2*r-r
    num_weights1 = (num_input+1)*num_hidden
    num_weights2 = (num_hidden+1)*num_output
    #weights1 = reshape(weights1, num_weights1)
    weights1 = weights1.reshape(num_weights1)
    #weights2 = reshape(weights2, num_weights2)
    weights2 = weights2.reshape(num_weights2)
    weights = hstack((weights1.as_numpy_array(),weights2.as_numpy_array()))
    args = (num_input,num_hidden,num_output,inputs,lambda_val,sparsityParam,beta)
    opttheta = optimize.fmin_l_bfgs_b(costfunc_gpu, weights, fprime=grad_costfunc_gpu, args=args, maxiter=400)
    weights = opttheta[0]
    weights1 = reshape(weights[0:num_weights1],(num_hidden,num_input+1))
    weights2 = reshape(weights[num_weights1:shape(weights)[0]], (num_output,num_hidden+1))
    scipy.io.savemat('learntFeaturesGPU.mat', mdict={'learntFeatures': weights1})
    return weights1
예제 #2
0
파일: test.py 프로젝트: colinmorris/gmmn
def test_random_feature_mmd_loss_approximation(sigma=[1,10], scale_weight=[0.5,1],
        n_features=3):
    print 'Testing random feature MMD loss approximation error'

    n_dims = 2
    n_target = 5
    n_pred = 5 

    target = gnp.rand(n_target, n_dims)
    pred = gnp.rand(n_pred, n_dims)

    rand_mmd = ls.get_loss_from_type_name(ls.LOSS_NAME_RANDOM_FEATURE_MMDGEN,
            sigma=sigma, scale_weight=scale_weight, n_features=n_features)
    rand_mmd.load_target(target)
    print rand_mmd

    mmd = ls.get_loss_from_type_name(ls.LOSS_NAME_MMDGEN_MULTISCALE_PAIR,
            sigma=sigma, scale_weight=scale_weight)
    mmd.load_target(target)

    rand_loss, rand_grad = rand_mmd.compute_loss_and_grad(pred, compute_grad=True)
    true_loss, true_grad = mmd.compute_loss_and_grad(pred, compute_grad=True)

    test_passed = test_vec_pair(rand_grad.asarray().ravel(), 'Approximate Gradient',
            true_grad.asarray().ravel(), '       True Gradient', error_thres=1e-2)
    test_passed = test_vec_pair(np.array([rand_loss]), 'Approximate Loss',
            np.array([true_loss]), '       True Loss', error_thres=1e-2) \
            and test_passed
    print ''
    return test_passed
예제 #3
0
파일: dbn.py 프로젝트: evolu8/gdbn
 def fpropDropout(self, inputBatch, weightsToStopBefore = None ):
     """
     Perform a (possibly partial) forward pass through the
     network. Updates self.state which, on a full forward pass,
     holds the input followed by each hidden layer's activation and
     finally the net input incident on the output layer. For a full
     forward pass, we return the actual output unit activations. In
     a partial forward pass we return None.
     """
     inputBatch = inputBatch if isinstance(inputBatch, gnp.garray) else gnp.garray(inputBatch)
     if weightsToStopBefore == None:
         weightsToStopBefore = len(self.weights)
     #self.state holds everything before the output nonlinearity, including the net input to the output units
     sample = (gnp.rand(*inputBatch.shape) > self.dropouts[0])
     self.state = [inputBatch * sample]
     for i in range(min(len(self.weights) - 1, weightsToStopBefore)):
         dropoutMultiplier = 1.0/(1.0-self.dropouts[i])
         curActs = self.hidActFuncts[i].activation(gnp.dot(dropoutMultiplier*self.state[-1], self.weights[i]) + self.biases[i])
         sample = (gnp.rand(*curActs.shape) > self.dropouts[i+1])
         self.state.append(curActs * sample)
     if weightsToStopBefore >= len(self.weights):
         dropoutMultiplier = 1.0/(1.0-self.dropouts[-1])
         self.state.append(gnp.dot(dropoutMultiplier*self.state[-1], self.weights[-1]) + self.biases[-1])
         self.acts = self.outputActFunct.activation(self.state[-1])
         return self.acts
     #we didn't reach the output units
     # To return the first set of hidden activations, we would set
     # weightsToStopBefore to 1.
     return self.state[weightsToStopBefore]
예제 #4
0
    def get_drop_masks(self, mask_count, in_drop=0, hd_drop=0):
        """Get mask_count dropout masks shaped for each layer in self.layers.

        Dropout masks are computed based on drop rates self.drop_input and
        self.drop_hidden, and self.drop_undrop. Masks are scaled so that the
        sum of each mask for a given layer is the same. If in_drop == 1, we do
        dropping on input layer and if hd_drop == 1, we also drop hiddens.
        """
        M = []
        # Generate an 'undrop' mask, which sets some masks to be dropless
        u_mask = (gp.rand(mask_count,1) < self.drop_undrop)
        for i in range(self.layer_count):
            # Set drop_rate based on layer and in_drop/hd_drop
            drop_rate = 0.0
            if ((i == 0) and (in_drop == 1)):
                drop_rate = self.drop_input
            elif (hd_drop == 1):
                drop_rate = self.drop_hidden
            # Get mask dimension for this layer
            mask_dim = self.layers[i].dim_input
            # Generate random 'bit' mask
            d_mask = (gp.rand(mask_count, mask_dim) > drop_rate)
            # Compute bootleg 'or' with the undrop mask
            mask = ((d_mask + u_mask) > 0.1)
            # Rescale mask entries to have unit mean
            scales = 1.0 / gp.mean(mask, axis=1)
            scales = scales[:,gp.newaxis]
            mask = mask * scales
            # Record the generated mask
            M.append(mask)
        return M
예제 #5
0
파일: test.py 프로젝트: yujiali/pynn
def test_batch_normalization_layer():
    print 'Testing Batch Normalization layer'
    in_dim = 3
    n_cases = 5

    x = gnp.randn(n_cases, in_dim) * 2 + 3
    t = gnp.randn(n_cases, in_dim) * 2

    loss = ls.get_loss_from_type_name(ls.LOSS_NAME_SQUARED)
    loss.load_target(t)

    bn_layer = ly.BatchNormalizationLayer(in_dim)
    bn_layer.params.gamma = gnp.rand(in_dim)
    bn_layer.params.beta = gnp.rand(in_dim)

    w_0 = bn_layer.params.get_param_vec()

    y = bn_layer.forward_prop(x, is_test=False)
    _, loss_grad = loss.compute_not_weighted_loss_and_grad(y, True)
    bn_layer.backward_prop(loss_grad)

    backprop_grad = bn_layer.params.get_grad_vec()

    def f(w):
        bn_layer.params.set_param_from_vec(w)
        y = bn_layer.forward_prop(x, is_test=False)
        return loss.compute_not_weighted_loss_and_grad(y)[0]

    fdiff_grad = finite_difference_gradient(f, w_0)

    test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient',
            backprop_grad, '  Backpropagation Gradient', eps=_BN_GRAD_CHECK_EPS,
            use_rel_err=True)
    print ''
    return test_passed
def mlpSoftmax_test():
    numClasses = 10
    inputSize = 28 * 28
    l1Size = 100
    l2Size = 20
    lambda_softmax = 1e-4
    lambda_hidden = 8e-5
    print "Loading data..."
    inputs, labels, testData, testLabels = obtain_data()
    print shape(labels)
    print "Done."
    numCases = shape(inputs)[1]
    num_weights_L1 = l1Size * (inputSize + 1)
    num_weights_L2 = l2Size * (l1Size + 1)
    num_weights_softmax = numClasses * l2Size
    r = gpu.sqrt(6)/gpu.sqrt(inputSize+l1Size+l2Size+1)
    theta_L1 = (gpu.rand(l1Size, inputSize+1))*2*r-r
    theta_L2 = (gpu.rand(l2Size, l1Size+1))*2*r-r
    theta_softmax = (gpu.rand(numClasses, l2Size))*2*r-r
    groundTruth = ground_Truth(labels,numCases)
    #theta_L1 = reshape(theta_L1, num_weights_L1)
    theta_L1 = theta_L1.reshape(num_weights_L1)
    #theta_L2 = reshape(theta_L2, num_weights_L2)
    theta_L2 = theta_L2.reshape(num_weights_L2)
    #theta_softmax = reshape(theta_softmax, num_weights_softmax)
    theta_softmax = theta_softmax.reshape(num_weights_softmax)
    theta = hstack((theta_L1.as_numpy_array(), theta_L2.as_numpy_array(), theta_softmax.as_numpy_array()))
    args = (numClasses, inputSize, l1Size, l2Size, lambda_softmax, lambda_hidden, inputs, labels, groundTruth)
    print "Starting Network Training..."
    opttheta = optimize.fmin_l_bfgs_b(mlpSoftmax_costfunc, theta, fprime=mlpSoftmax_grad, args=args, maxiter=400)
    theta = opttheta[0]
    print "Training finished."
    scipy.io.savemat('mlpSoftmax.mat', mdict={'theta': theta})
    print "Now testing prediction accuracy..."
    theta_L1 = reshape(theta[0:num_weights_L1], (l1Size, inputSize + 1))
    theta_L2 = reshape(theta[num_weights_L1:num_weights_L2+num_weights_L1], (l2Size, l1Size + 1))
    theta_softmax = reshape(theta[num_weights_L2+num_weights_L1:shape(theta)[0]], (numClasses, l2Size))
    numCasesPred = shape(testData)[1]
    testData = concatenate((ones((1,numCasesPred)), testData), axis = 0)
    hidden_sum_L1 = dot(theta_L1, testData)
    hidden_activation_L1 = 1/(1 + exp(-hidden_sum_L1))
    hidden_activation_L1 = concatenate((ones((1,numCasesPred)), hidden_activation_L1), axis=0)
    hidden_sum_L2 = dot(theta_L2, hidden_activation_L1)
    hidden_activation_L2 = 1/(1 + exp(-hidden_sum_L2))
    hidden_sum_softmax = dot(theta_softmax, hidden_activation_L2)
    hidden_sum_softmax = hidden_sum_softmax - hidden_sum_softmax.max(axis = 0)
    predictions = exp(hidden_sum_softmax)
    predictions = predictions / predictions.sum(axis = 0)
    pred = predictions.argmax(axis=0) + 1
    testLabels = squeeze(testLabels)
    accuracy = mean(pred == testLabels) * 100
    print "Accuracy: ", accuracy, "%"
    return pred, testLabels
def multilayer_feature_learning(data, inputSize, l1Size, l2Size, sparsityParam, lambda_val, beta):
    print "Now starting feature abstraction..."
    num_input = inputSize
    num_hidden_L1 = l1Size
    num_hidden_L2 = l2Size
    num_output_L1 = inputSize
    num_output_L2 = num_hidden_L1
    sparsityParam = sparsityParam
    lambda_val = lambda_val
    beta = beta
    inputs = gpu.garray(data)
    r = gpu.sqrt(6)/gpu.sqrt(num_hidden_L1+num_input+1)
    weights1_L1 = (gpu.rand(num_hidden_L1,num_input+1))*2*r-r
    weights2_L1 = (gpu.rand(num_output_L1,num_hidden_L1+1))*2*r-r
    num_weights1_L1 = (num_input+1)*num_hidden_L1
    num_weights2_L1 = (num_hidden_L1+1)*num_output_L1
    weights1_L1 = weights1_L1.reshape(num_weights1_L1)
    weights2_L1 = weights2_L1.reshape(num_weights2_L1)
    weights_L1 = hstack((weights1_L1.as_numpy_array(),weights2_L1.as_numpy_array()))
    print "Level 1 Abstraction Starting...."
    weights_L1 = linear_decoder_run_ReLU(data, weights_L1, num_input, num_hidden_L1)
    weights1_L1 = weights_L1[0:num_weights1_L1].reshape((num_hidden_L1,num_input+1))
    weights2_L1 = weights_L1[num_weights1_L1:shape(weights_L1)[0]].reshape((num_output_L1,num_hidden_L1+1))
    scipy.io.savemat('HiggsBosonLevel1.mat', mdict={'learntFeaturesL1_1': weights1_L1, 'learntFeaturesL1_2': weights2_L1})
    L1_activation = feedforward(weights1_L1, inputs)
    del weights_L1
    del weights1_L1
    del weights2_L1
    gpu.free_reuse_cache()
    v = gpu.sqrt(6)/gpu.sqrt(num_hidden_L2+num_hidden_L1+1)
    weights1_L2 = (gpu.rand(num_hidden_L2,num_hidden_L1+1))*2*v-v
    weights2_L2 = (gpu.rand(num_output_L2,num_hidden_L2+1))*2*v-v
    num_weights1_L2 = (num_hidden_L1+1)*num_hidden_L2
    num_weights2_L2 = (num_hidden_L2+1)*num_output_L2
    weights1_L2 = weights1_L2.reshape(num_weights1_L2)
    weights2_L2 = weights2_L2.reshape(num_weights2_L2)
    weights_L2 = hstack((weights1_L2.as_numpy_array(),weights2_L2.as_numpy_array()))
    print "Level 2 Abstraction Starting...."
    weights_L2 = linear_decoder_run_ReLU(L1_activation, weights_L2, num_hidden_L1, num_hidden_L2)
    weights1_L2 = weights_L2[0:num_weights1_L2].reshape((num_hidden_L2,num_hidden_L1+1))
    weights2_L2 = weights_L2[num_weights1_L2:shape(weights_L2)[0]].reshape((num_output_L2,num_hidden_L2+1))
    scipy.io.savemat('HiggsBosonLevel2.mat', mdict={'learntFeaturesL2_1': weights1_L2,'learntFeaturesL2_2': weights2_L2})
    L2_activation = feedforward(weights1_L2, L1_activation)
    del weights_L2
    del weights1_L2
    del weights2_L2
    gpu.free_reuse_cache()
    gpu.free_reuse_cache()
    print "Abstraction completed."
    return L2_activation
def checkGradientGPU():
    num_input = 8*8*3
    num_hidden = 10
    num_output = num_input
    lambda_val = 0.003
    sparsityParam = 0.035
    beta = 5
    data = scipy.io.loadmat('stlSampledPatches.mat')
    patches = data['patches']
    inputs = patches[:,0:10]
    r = gpu.sqrt(6)/gpu.sqrt(num_hidden+num_input+1)
    weights1 = (gpu.rand(num_hidden,num_input+1))*2*r-r
    weights2 = (gpu.rand(num_output,num_hidden+1))*2*r-r
    num_weights1 = (num_input+1)*num_hidden
    num_weights2 = (num_hidden+1)*num_output
    weights1 = weights1.reshape(num_weights1)
    weights2 = weights2.reshape(num_weights2)
    weights = hstack((weights1.as_numpy_array(),weights2.as_numpy_array()))
    args = (num_input,num_hidden,num_output,inputs,lambda_val,sparsityParam,beta)
    numgrad = zeros(size(weights))
    numgrad2 = zeros(size(weights))
    perturb = zeros(size(weights))
    e = 1e-4
    for p in range(size(weights)):
        perturb[p] = e;
        minus_weights = weights - perturb
        plus_weights = weights + perturb
        loss1 = costfunc_gpuTRY(minus_weights, *args)
	lossc1 = costfunc(minus_weights, *args)
        loss2 = costfunc_gpuTRY(plus_weights, *args)
	lossc2 = costfunc(plus_weights, *args)
        numgrad[p] = (loss2 - loss1) / (2*e)
	numgrad2[p] = (lossc2 - lossc1) / (2*e)
        perturb[p] = 0
    grad = grad_costfunc_gpu(weights, *args)
    grad2 = grad_costfunc(weights, *args)
    diff = linalg.norm(numgrad-grad)/linalg.norm(numgrad+grad)
    diff2 = linalg.norm(numgrad2-grad2)/linalg.norm(numgrad2+grad2)
    diff3 = linalg.norm(numgrad-grad2)/linalg.norm(numgrad+grad2)
    diff4 = linalg.norm(numgrad2-grad)/linalg.norm(numgrad2+grad)
    diffnum = linalg.norm(numgrad2-numgrad)/linalg.norm(numgrad2+numgrad)
    diffgrad = linalg.norm(grad2-grad)/linalg.norm(grad2+grad)
    print "pure GPU difference:",diff
    print "pure CPU difference:",diff2
    print "GPU cost, CPU grad:",diff3
    print "CPU cost, GPU grad:",diff4
    print "CPU cost and GPU cost difference:",diffnum
    print "CPU grad and GPU grad difference:",diffgrad
    return "OK"
예제 #9
0
def random_like(x):
    """Return an array of the same shape as `x` filled with random numbers from
    the interval [0, 1)."""
    if not isinstance(x, np.ndarray):
        return gp.rand(x.shape)
    else:
        return np.random.random(x.shape)
예제 #10
0
파일: rbm.py 프로젝트: osdf/gpustack
    def pt_init(self, H=bernoulli, V=bernoulli, init_var=1e-2, init_bias=0., 
            rho=0.5, lmbd=0., l2=0., **kwargs):
        pt_params = gzeros(self.m_end + self.shape[1] + self.shape[0])
        if init_var is None:
            init_heur = 4*np.sqrt(6./(self.shape[0]+self.shape[1]))
            pt_params[:self.m_end] = gpu.rand(self.m_end)
            pt_params[:self.m_end] *= 2
            pt_params[:self.m_end] -= 1
            pt_params[:self.m_end] *= init_heur
        else:
            pt_params[:self.m_end] = init_var * gpu.randn(self.m_end)
        pt_params[self.m_end:] = init_bias

        self.H = H
        self.V = V 
        self.activ = match_table[H]

        self.pt_score = self.reconstruction
        self.pt_grad = self.grad_cd1

        self.l2 = l2

        self.rho = rho
        self.lmbd = lmbd
        self.rho_hat = None

        return pt_params
예제 #11
0
파일: ae.py 프로젝트: yysherlock/msae
 def getCorruptedInput(self, input):
     if self.corrputionLevel>0:
         rnd=gp.rand(self.batchsize, self.vDim)>self.corrputionLevel
         output=rnd*input
         return output
     else:
         return input
예제 #12
0
def rbm_sample(w_vh, w_v, w_h, x, k=1, clamped=None):
    """
    Sample from RBM with k steps of Gibbs sampling
    
    w_vh: Weights between visible and hidden units (matrix of size DxH)
    w_v: Visible unit biases (column vector of size Dx1)
    w_h: Hidden unit biases (column vector of size Hx1)
    x: Input (column vector of size DxN)
    k: Number of Gibbs steps. Default is 1.
    clamped: If not None, keeps the given elements of x clamped (constant)
        while sampling
        clamped is a two-tuple that gives the start and end indices of clamped elements
    Returns hidden unit and visible unit activations (matrices of size HxN, DxN)
    """
    if clamped is not None:
        cx = x[clamped[0] : clamped[1], :]

    v = x
    for i in range(k):
        # sample hiddens
        ah = gnp.dot(w_vh.T, v) + w_h
        h = gnp.logistic(ah)
        hs = h > gnp.rand(h.shape[0], h.shape[1])

        # sample visibles
        av = gnp.dot(w_vh, hs) + w_v
        v = gnp.logistic(av)

        if clamped is not None:
            v[clamped[0] : clamped[1], :] = cx

    return h, v
예제 #13
0
파일: rbm.py 프로젝트: surban/ml
    def sample_vis_3d(self, n_chains, n_steps, gibbs_steps_between_samples,
                      sample_probabilities=False, init_vis=None, beta=1,
                      betas=None):
        """Obtains unbiased samples for the visible units.
        Runs n_chains Gibbs chains in parallel for 
        (n_steps*gibbs_steps_between_samples) steps.
        Grabs samples every gibbs_steps_between_samples Gibbs steps."""
        samples = gp.zeros((n_steps, n_chains, self.n_vis))
        if init_vis is None:
            vis = gp.rand((n_chains, self.n_vis)) < 0.5
        else:
            assert init_vis.shape[0] == n_chains
            vis = init_vis

        for step in range(n_steps):
            #print >>stderr, "%d / %d                 \r" % (step, n_steps),
            if betas is None:
                vis, p_vis = self.gibbs_sample(vis, gibbs_steps_between_samples,
                                               beta=beta)
            else:
                assert gibbs_steps_between_samples is None
                vis, p_vis = self.annealed_gibbs_sample(vis, betas)
            if sample_probabilities:
                sample = p_vis
            else:
                sample = vis
            samples[step, :, :] = sample
        return samples
예제 #14
0
    def backprop(self, X, y_target) :
        # forward
        activity = []
        result = X
        for i in range(len(self.weights)):
            p = self.dropout_probability[i]
            mask = (g.rand(result.shape) >= p)
            result = result * mask
            del mask
            activity.append(result)
            w,b = self.weights[i]
            result = g.dot(result,w) + b
            result = self.activation[i](result)
            
        # backward
        gradientNodes = []
        lastGradient = self.gradient[-1](result, y_target)
        gradientNodes.append(lastGradient)
        for i in reversed(range(1,len(self.weights))):
            w,b = self.weights[i]
            lastGradient = g.dot(lastGradient, w.T) * self.gradient[i-1](activity[i])
            gradientNodes.append(lastGradient)
                
        # get gradient
        resultGradient = []
        for i in range(len(self.weights)):
            gradW = (g.dot(activity[i].T,gradientNodes[-(i+1)]) / len(X))
            assert(gradW.shape == self.weights[i][0].shape)
            gradB = (g.sum(gradientNodes[-(i+1)],axis=0) / len(X))
            assert(gradB.shape == self.weights[i][1].shape)
            resultGradient.append([gradW,gradB])

        del gradientNodes
        
        return resultGradient
예제 #15
0
def bernoulli(data, wm, bias, sampling=False):
    """
    """
    suff = (gpu.dot(data, wm) + bias).logistic()
    if sampling:
        sample = suff > gpu.rand(suff.shape)
    else:
        sample = None
    return suff, sample
예제 #16
0
파일: nnet.py 프로젝트: awni/awni_ml
    def initParams(self):
        sizes = [self.inputDim]+self.layerSizes+[self.outputDim]
        scales = [gp.sqrt(6)/gp.sqrt(n+m) for n,m in zip(sizes[:-1],sizes[1:])]
        self.stack = [[gp.rand(m,n)*2*s-s,gp.zeros((m,1))] \
                            for n,m,s in zip(sizes[:-1],sizes[1:],scales)]
        self.hActs = [gp.empty((s,self.mbSize)) for s in sizes]

        if self.train:
            self.deltas = [gp.empty((s,self.mbSize)) for s in sizes[1:]]
            self.grad = [[gp.empty(w.shape),gp.empty(b.shape)] for w,b in self.stack]
예제 #17
0
파일: nnet.py 프로젝트: awni/229t_project
    def initParams(self):
        # crude way of random initialization (random seed) for parameters
        import time
        self.seed = int(time.time()) % 100000;
        # for tt in range(self.seed): gp.rand()
        
        sizes = [self.inputDim]+self.layerSizes+[self.outputDim]
        scales = [gp.sqrt(6)/gp.sqrt(n+m) for n,m in zip(sizes[:-1],sizes[1:])]
        self.stack = [[gp.rand(m,n)*2*s-s,gp.zeros((m,1))] \
                            for n,m,s in zip(sizes[:-1],sizes[1:],scales)]
        self.hActs = [gp.empty((s,self.mbSize)) for s in sizes]

        if self.train:
            self.deltas = [gp.empty((s,self.mbSize)) for s in sizes[1:]]
            self.grad = [[gp.empty(w.shape),gp.empty(b.shape)] for w,b in self.stack]
            for tt in range(self.seed): gp.rand()

            self.stack = [[ws[0]+.01 * gp.randn(ws[0].shape),ws[1]+.01 * gp.randn(ws[1].shape)] 
                        for ws in self.stack]
예제 #18
0
    def initParams(self):
	"""
	Initialize parameters using 6/sqrt(fanin+fanout)
	"""
        sizes = [self.inputDim]+self.layerSizes+[self.outputDim]
        scales = [gp.sqrt(6)/gp.sqrt(n+m) for n,m in zip(sizes[:-1],sizes[1:])]
        self.stack = [[gp.rand(m,n)*2*s-s,gp.zeros((m,1))] \
                            for n,m,s in zip(sizes[:-1],sizes[1:],scales)]
        if self.temporalLayer > 0:
            rs = sizes[self.temporalLayer]
            s = gp.sqrt(6)/ rs
            # temporal layer stored at end of stack
            self.stack.append([gp.rand(rs,rs) * 2 * s - s, gp.zeros((2,1))])
        
        if self.train:
            #TODO why store all deltas?
            #self.deltas = [gp.empty((s,self.mbSize)) for s in sizes[1:]]
            #NOTE if a temporal layer is used it's already added to stack so will have a grad
            self.grad = [[gp.empty(w.shape),gp.empty(b.shape)] for w,b in self.stack]
예제 #19
0
파일: deep_net.py 프로젝트: Jun321/deepnet
 def input_to_hidden(self, set_name = 'train'):
     self.timer_logger('input_to_hidden {0}'.format(type), time.time()) 
     self.results['activations'] = []     
     if set_name == 'train':            
         self.results['activations'].append([self.batch, self.w[0], self.b[0]])   
         dropped_out = self.batch * (gpu.rand(self.current_batch_size,self.X.shape[1]) > self.dropout[0]) 
         self.results['current']  = gpu.dot(dropped_out,self.w[0])+self.b[0]
     else:                               
         self.results['current'] = gpu.dot(self.batch,self.w[0]) + self.b[0]
     self.timer_logger('input_to_hidden {0}'.format(type), time.time()) 
예제 #20
0
파일: ada_dnn.py 프로젝트: ryaninhust/Apep
    def feed_forward(self, input_batch):
        if not isinstance(input_batch, gnp.garray):
            input_batch = gnp.garray(input_batch)
        weights_to_stop = len(self.weights)
        self.state = [input_batch * (gnp.rand(*input_batch.shape) > self.dropouts[0])]

        for i in range(min(len(self.weights) -1, weights_to_stop)):
            do_factor = 1.0 / (1.0-self.dropouts[i])
            linear_outputs = gnp.dot(self.state[-1]*do_factor, self.weights[i]) + self.biases[i]
            act_outputs = self.hidden_functions[i].activate(linear_outputs)
            self.state.append(act_outputs*(gnp.rand(*act_outputs.shape) > self.dropouts[i+1]))

        if weights_to_stop >= len(self.weights):
            do_factor = 1.0 / (1.0-self.dropouts[-1])
            self.state.append(gnp.dot(self.state[-1]*do_factor, self.weights[-1]) + self.biases[-1])
            self.acts = self.output_function.activate(self.state[-1])
            return self.acts

        return self.state[weights_to_stop]
예제 #21
0
def sample_binomial(p):
    """Samples elementwise from the binomial distribution with 
    probability p"""
    if use_debug_rng:
        r = myrand.rand(p.shape)
    else:
        r = gp.rand(p.shape)
    # n = np.random.random(p.shape)
    # n = gp.rand(p.shape)
    # r = gp.zeros(p.shape)
    return r < p
예제 #22
0
파일: uae.py 프로젝트: Chippers255/gpustack
 def pt_init(self, score=None, init_var=1e-2, init_bias=0., **kwargs):
     pt_params = gzeros(self.size + self.m_end + self.shape[0])
     if init_var is None:
         init_heur = 4*np.sqrt(6./(self.shape[0]+self.shape[1]))
         pt_params[:self.m_end] = gpu.rand(self.m_end)
         pt_params[:self.m_end] *= 2
         pt_params[:self.m_end] -= 1
         pt_params[:self.m_end] *= init_heur
         
         pt_params[self.size:-self.shape[0]] = gpu.rand(self.m_end)
         pt_params[self.size:-self.shape[0]] *= 2
         pt_params[self.size:-self.shape[0]] -= 1
         pt_params[self.size:-self.shape[0]] *= init_heur
     else: 
         pt_params[:self.m_end] = init_var * gpu.randn(self.m_end)
         pt_params[self.size:-self.shape[0]] = init_var * gpu.randn(self.m_end)
     
     pt_params[self.m_end:self.size] = init_bias
     pt_params[-self.shape[0]:] = init_bias
     self.score = score
     return pt_params
예제 #23
0
파일: test.py 프로젝트: yujiali/pynn
def test_nonlin_invert(nonlin):
    print 'Testing inverting nonlinearity <%s>' % nonlin.get_name()

    sx, sy = 3, 4

    x = gnp.rand(sx, sy)
    y = nonlin.forward_prop(x)
    xx = nonlin.invert_output(y)

    test_passed = test_vec_pair(x.asarray().ravel(), '%15s' % 'Input',
            xx.asarray().ravel(), '%15s' % 'Inferred Input')
    print ''
    return test_passed
예제 #24
0
파일: deep_net.py 프로젝트: Jun321/deepnet
 def hidden_to_output(self, set_name = 'train'):   
     self.timer_logger('hidden_to_output {0}'.format(type), time.time()) 
     i = 0   
     for weight, bias in zip(self.w, self.b):
         if i > 0: #ignore the first weight that goes from inputs to first hidden layer
             if set_name == 'train':                            
                 self.results['activations'].insert(0, [self.activation(self.results['current'])   , weight])            
                 self.results['current'] = gpu.dot(self.results['activations'][0][0] * 
                                               (gpu.rand(self.results['activations'][0][0].shape[0],self.results['activations'][0][0].shape[1]) > self.dropout[1]), #dropout
                                                weight) + bias                    
             else:
                 self.results['current'] =  gpu.dot(self.activation(self.results['current'])* (1 - self.dropout[1]), weight) + bias
       
         i += 1
     self.timer_logger('hidden_to_output {0}'.format(type), time.time()) 
예제 #25
0
파일: rbm_vanialla.py 프로젝트: surban/ml
 def sample_vis(self, n_chains, n_steps, gibbs_steps_between_samples,
                sample_probabilities=False):
     """Obtains unbiased samples for the visible units.
     Runs n_chains Gibbs chains in parallel for n_steps.
     Grabs samples every gibbs_steps_between_samples Gibbs steps."""
     samples = gp.zeros((n_chains * n_steps, self.n_vis))
     vis = gp.rand((n_chains, self.n_vis)) < 0.5
     for step in range(n_steps):
         print >>stderr, "%d / %d                 \r" % (step, n_steps),
         vis, p_vis = self.gibbs_sample(vis, gibbs_steps_between_samples)
         if sample_probabilities:
             sample = p_vis
         else:
             sample = vis
         samples[step*n_chains : (step+1)*n_chains, :] = sample
     return samples
예제 #26
0
def dbn_sample(ws_vh, ws_v, ws_h, x, y=None, k=1):
    """
    Sample from DBN
    
    ws_vh, ws_v, ws_h: Lists of layer weights for DBN
    x: Initial sample. This is the input to DBN. (1xD vector)
    y: Class label for the sample. This corresponds to sampling from class
        conditionals. (1-of-K coded, row vector) 
    k: Number of Gibbs steps
    Returns a sample from DBN (1xD vector)
    """
    L = len(ws_vh)

    # make a forward pass to get from input layer to visible layer of top level
    # RBM
    h_prev = x.T

    # forward (bottom-up) pass
    for l in range(L - 1):
        ah = gnp.dot(ws_vh[l].T, h_prev) + ws_h[l]
        h_prev = gnp.logistic(ah)
        h_prev = h_prev > gnp.rand(h_prev.shape[0], h_prev.shape[1])

    # if not supervised, sample from top layer RBM without clamping any of its
    # inputs
    if y is None:
        # sample from top layer RBM
        h, v = rbm_sample(ws_vh[-1], ws_v[-1], ws_h[-1], h_prev, k)
    else:
        K = y.shape[1]  # number of classes
        H = ws_vh[-1].shape[0]
        # generate a random input to top layer RBM with class label units clamped to y
        v = gnp.concatenate((y.T, h_prev))
        # sample from top layer RBM
        h, v = rbm_sample(ws_vh[-1], ws_v[-1], ws_h[-1], v, k, clamped=(0, K))
        v = v[K:H, :]

    # backward (top-down) pass
    # propagate sample from RBM back to input
    for l in range(L - 2, -1, -1):
        av = gnp.dot(ws_vh[l], v) + ws_v[l]
        v = gnp.logistic(av)

    return v.T
예제 #27
0
def test(shape=(3,4,5)):
    """
Make sure that the gnumpy conversion is exact.
"""
    gpu = theano.sandbox.cuda.basic_ops.gpu_from_host
    U = gpu(theano.tensor.ftensor3('U'))
    ii = theano.function([U], gpu(U+1))


    A = gnumpy.rand(*shape)
    A_cnd = garray_to_cudandarray(A)
    B_cnd = ii(A_cnd)
    B = cudandarray_to_garray(B_cnd)
    from numpy import array
    B2 = array(B_cnd)

    u = (A+1).asarray()
    v = B.asarray()
    w = B2
    assert abs(u-v).max() == 0
    assert abs(u-w).max() == 0
예제 #28
0
파일: layer.py 프로젝트: jakesnell/pynn
    def forward_prop(self, X, add_noise=False, compute_loss=False):
        """
        Compute the forward propagation step that maps the input data matrix X
        into the output. Loss and loss gradient will be computed when
        compute_loss set to True. Note that the loss is applied on nonlinearity
        activation, rather than the final output by default, unless 
        loss_after_nonlin is set to True.
        """
        if self.params.dropout > 0 and add_noise:
            self.dropout_mask = gnp.rand(X.shape[0], X.shape[1]) > self.params.dropout
            self.inputs = X * self.dropout_mask
        else:
            self.inputs = X
        self.noise_added = add_noise

        if not self.use_batch_normalization:
            self.activation = self.inputs.dot(self.params.W) + self.params.b
            self.output = self.nonlin.forward_prop(self.activation)

            if self.sparsity_weight > 0:
                self._sparsity_current = self._sparsity_smoothing * self.output.mean(axis=0) \
                        + (1 - self._sparsity_smoothing) * self._sparsity_current
                self._sparsity_objective = (- self.sparsity * gnp.log(self._sparsity_current + 1e-20) \
                        - (1 - self.sparsity) * gnp.log(1 - self._sparsity_current + 1e-20)).sum() * self.sparsity_weight
        else:
            self.activation = self.inputs.dot(self.params.W)
            self.bn_output = self.bn_layer.forward_prop(self.activation)
            self.output = self.nonlin.forward_prop(self.bn_output)

        if compute_loss and self.loss is not None:
            if self.loss_after_nonlin:
                self.loss_value, self.loss_grad = self.loss.compute_loss_and_grad(
                        self.output, compute_grad=True)
            else:
                self.loss_value, self.loss_grad = self.loss.compute_loss_and_grad(
                        self.activation if not self.use_batch_normalization else self.bn_output, compute_grad=True)
            self.loss_computed = True
        
        return self.output
예제 #29
0
def test(shape=(3, 4, 5)):
    """
    Make sure that the gnumpy conversion is exact from garray to
    CudaNdarray back to garray.
    """
    gpu = theano.sandbox.cuda.basic_ops.gpu_from_host
    U = gpu(theano.tensor.ftensor3('U'))
    ii = theano.function([U], gpu(U + 1))

    A = gnumpy.rand(*shape)
    A_cnd = garray_to_cudandarray(A)
    assert A_cnd.shape == A.shape
    # dtype always float32
    # garray don't have strides
    B_cnd = ii(A_cnd)
    B = cudandarray_to_garray(B_cnd)
    assert A_cnd.shape == A.shape

    u = (A + 1).asarray()
    v = B.asarray()
    w = np.array(B_cnd)
    assert (u == v).all()
    assert (u == w).all()
예제 #30
0
 def feedforward(self, X, return_on_gpu=False):
     """Perform feedforward through this layer.
     """
     # Cleanup debris from any previous feedforward
     self._cleanup()
     # Record (a pointer to) the passed input
     self.X = gp.garray(X)
     # Generate and apply a dropout mask to the input
     if (self.drop_rate > 1e-4):
         drop_mask = self.drop_scale * \
                 (gp.rand((self.X.shape[0], self.X.shape[1])) > self.drop_rate)
     else:
         drop_mask = gp.ones((self.X.shape[0], self.X.shape[1]))
     self.dYdX = drop_mask
     if (self.fuzz_scale > 1e-4):
         fuzz_bump = (self.fuzz_scale / self.drop_scale) * \
                 gp.randn((self.X.shape[0], self.X.shape[1]))
         self.Y = drop_mask * (self.X + fuzz_bump)
     else:
         self.Y = drop_mask * self.X
     if not return_on_gpu:
         self.Y = gp.as_numpy_array(self.Y)
     return self.Y
예제 #31
0
    def train(self):
        config = self.config

        # convert t into a matrix in 1-of-K representation if it is a vector
        t = self.train_data.T
        T_matrix = self.output.act_type.label_vec_to_mat(t, self.train_data.K)

        layer_config = LayerConfig()
        layer_config.learn_rate = config.learn_rate
        layer_config.momentum = config.init_momentum
        layer_config.weight_decay = config.weight_decay

        nnstore = NNStore()
        nnstore.init_from_net(self)

        best_net = NNStore()
        best_net.init_from_net(self)

        train_acc, val_acc, test_acc = self.display_training_info(
                -1, 
                self._compute_loss(
                    self.train_data.X, T_matrix, config.minibatch_size),
                0)
        acc_rec = np.zeros((config.num_epochs / config.epoch_to_display + 1, 4))
        acc_rec[0, 0] = 0
        acc_rec[0, 1] = train_acc
        if config.is_val:
            acc_rec[0, 2] = val_acc
        if config.is_test:
            acc_rec[0, 3] = test_acc

        t_start = time.time()

        best_acc = val_acc
        if self.config.is_test:
            best_test_acc = test_acc
        best_epoch = -1

        for epoch in range(0, config.num_epochs):
            gnp.free_reuse_cache()

            # decrease learning rate over time
            layer_config.learn_rate = config.learn_rate / \
                    (epoch / config.lr_drop_rate + 1)

            # TODO [dirty] special for Lnsvm
            if isinstance(self.output.act_type, act.LnsvmVariantOutput):
                #self.output.act_type.n = 3.0 - (3.0 - 0.5) / 50 * epoch
                self.output.act_type.n = 0.5
                if self.output.act_type.n < 0.5:
                    self.output.act_type.n = 0.5 

                if (epoch + 1) % config.epoch_to_display == 0:
                    print 'n %.4f' % self.output.act_type.n,
            
            if epoch >= config.switch_epoch:
                layer_config.momentum = config.final_momentum

            # shuffle the dataset 
            idx = np.random.permutation(self.num_total_cases)
            #idx = np.arange(self.num_total_cases)
            train_X = self.train_data.X[idx]
            train_T = T_matrix[idx]

            if config.input_noise > 0:
                train_X = train_X * (gnp.rand(train_X.shape) > config.input_noise)
                # train_X = train_X + gnp.randn(train_X.shape) * config.input_noise

            loss = 0

            for batch in range(0, self.num_minibatches):
                i_start = batch * config.minibatch_size
                if not batch == self.num_minibatches - 1:
                    i_end = i_start + config.minibatch_size
                else:
                    i_end = self.num_total_cases

                X = train_X[i_start:i_end]
                T = train_T[i_start:i_end]

                # forward pass
                self._forward(X)

                # compute loss
                loss += self.output.loss(T)

                if self.output.Y.isnan().any():
                    import ipdb
                    ipdb.set_trace()
                    print 'batch #%d <-- nan' % batch

                # backprop
                dLdXabove = self.output.backprop(layer_config)
                for i in range(self.num_layers-1, -1, -1):
                    dLdXabove = self.layer[i].backprop(dLdXabove, layer_config)

            # statistics
            avg_loss = 1.0 * loss / self.num_total_cases

            if (epoch + 1) % config.epoch_to_display == 0:
                train_acc, val_acc, test_acc = self.display_training_info(
                        epoch, avg_loss, time.time() - t_start)

                if val_acc == None:
                    val_acc = train_acc

                if (config.show_task_loss and val_acc < best_acc) or \
                        (not config.show_task_loss and val_acc > best_acc):
                    best_acc = val_acc
                    best_net.update_from_net(self)
                    if config.is_test:
                        best_test_acc = test_acc
                    best_epoch = epoch
                t_start = time.time()
                acc_rec[(epoch + 1) / config.epoch_to_display, 0] = epoch + 1
                acc_rec[(epoch + 1) / config.epoch_to_display, 1] = train_acc
                if config.is_val:
                    acc_rec[(epoch + 1) / config.epoch_to_display, 2] = val_acc
                if config.is_test:
                    acc_rec[(epoch + 1) / config.epoch_to_display, 3] = test_acc

            if (epoch + 1) % config.epoch_to_save == 0:
                nnstore.update_from_net(self)
                nnstore.write(config.output_dir + '/m' + str(epoch + 1) + '.pdata')


        print '----------------------------------------------------------------'

        if config.show_task_loss:
            s = 'loss'
        else:
            s = 'acc'
        
        if config.is_val:
            print 'Best val_%s %.4f' % (s, best_acc),
        else:
            print 'Best train_%s %.4f' % (s, best_acc),

        if config.is_test:
            print '--> test_%s %.4f' % (s, best_test_acc),
        print 'at epoch %d' % (best_epoch + 1)

        if config.is_output:
            f = open('%s/acc_rec.pdata' % config.output_dir, 'w')
            pickle.dump(acc_rec, f, -1)
            f.close()

            self.write_config('%s/cfg.txt' % config.output_dir)

            # save the best net
            fname = config.output_dir + '/best_net.pdata'
            print 'Saving the best model to ' + fname
            best_net.write(fname)

        if config.is_test:
            return (best_acc, best_test_acc)
        else:
            return (best_acc)
예제 #32
0
 def threshold_mask_soft(x, k, dropout=None):
     b = k * gp.std(x, axis=1)[:, gp.newaxis]
     std_matrix = gp.dot(b, gp.ones((1, x.shape[1])))
     if dropout == None: return (x > std_matrix)
     return (x > std_matrix) * (gp.rand(x.shape) > (1 - dropout))
예제 #33
0
 def mask(x, dropout=1):
     return (gp.rand(x.shape) > (1 - dropout))
예제 #34
0
 def rand(shape, dtype):
     return gp.rand(*shape)
예제 #35
0
"""
This code can only work if gnumpy and theano are initialized on the
same gpu as theano.
"""
from six.moves import reduce

try:
    import gnumpy
    import cudamat
    gnumpy_available = True

    ___const_garray = gnumpy.rand(1)

    import theano.sandbox.cuda as cuda
    if cuda.cuda_available == False:
        raise ImportError('Optional theano package cuda disabled')

    def cudandarray_to_garray(x, copyif=False):
        """ take a CudaNdarray and return a gnumpy.garray object.

        :type x: CudaNdarray
        :param x: The array to transform to gnumpy.garray.
        :type copyif: bool
        :param copyif: If False, raise an error if x is not c contiguous.
                       If it is c contiguous, we return a GPUArray that share
                       the same memory region as x.
                       If True, copy x if it is no c contiguous, so the return won't
                       shape the same memory region. If c contiguous, the return
                       will share the same memory region.

                       We need to do this as GPUArray don't fully support strided memory.
예제 #36
0
 def rand_binary(shape, dtype):
     return gp.rand(*shape) > .5
예제 #37
0
 def rand(*shape):
     return gp.rand(*shape)
def multilayer_feature_learning(data, inputSize, l1Size, l2Size, l3Size,
                                sparsityParam, lambda_val, beta):
    print "Now starting feature abstraction..."
    num_input = inputSize
    num_hidden_L1 = l1Size
    num_hidden_L2 = l2Size
    num_hidden_L3 = l3Size
    num_output_L1 = inputSize
    num_output_L2 = num_hidden_L1
    num_output_L3 = num_hidden_L2
    sparsityParam = sparsityParam
    lambda_val = lambda_val
    beta = beta
    inputs = gpu.garray(data)
    r = gpu.sqrt(6) / gpu.sqrt(num_hidden_L1 + num_input + 1)
    weights1_L1 = (gpu.rand(num_hidden_L1, num_input + 1)) * 2 * r - r
    weights2_L1 = (gpu.rand(num_output_L1, num_hidden_L1 + 1)) * 2 * r - r
    num_weights1_L1 = (num_input + 1) * num_hidden_L1
    num_weights2_L1 = (num_hidden_L1 + 1) * num_output_L1
    #weights1_L1 = reshape(weights1_L1, num_weights1_L1)
    weights1_L1 = weights1_L1.reshape(num_weights1_L1)
    #weights2_L1 = reshape(weights2_L1, num_weights2_L1)
    weights2_L1 = weights2_L1.reshape(num_weights2_L1)
    weights_L1 = hstack(
        (weights1_L1.as_numpy_array(), weights2_L1.as_numpy_array()))
    print "Level 1 Abstraction Starting...."
    args = (num_input, num_hidden_L1, num_output_L1, inputs, lambda_val,
            sparsityParam, beta)
    opttheta_L1 = optimize.fmin_l_bfgs_b(costfunc_gpu,
                                         weights_L1,
                                         fprime=grad_costfunc_gpu,
                                         args=args,
                                         maxiter=400)
    weights_L1 = gpu.garray(opttheta_L1[0])
    #weights1_L1 = reshape(weights_L1[0:num_weights1_L1],(num_hidden_L1,num_input+1))
    weights1_L1 = weights_L1[0:num_weights1_L1].reshape(
        (num_hidden_L1, num_input + 1))
    #weights2_L1 = reshape(weights_L1[num_weights1_L1:shape(weights_L1)[0]],(num_hidden_L2,num_hidden_L1+1))
    weights2_L1 = weights_L1[num_weights1_L1:shape(weights_L1)[0]].reshape(
        (num_output_L1, num_hidden_L1 + 1))
    scipy.io.savemat('MINSTLevel1.mat',
                     mdict={
                         'learntFeaturesL1_1': weights1_L1.as_numpy_array(),
                         'learntFeaturesL1_2': weights2_L1.as_numpy_array()
                     })
    L1_activation = feedforward(weights1_L1, inputs)
    del weights_L1
    del weights1_L1
    del weights2_L1
    gpu.free_reuse_cache()
    v = gpu.sqrt(6) / gpu.sqrt(num_hidden_L2 + num_hidden_L1 + 1)
    weights1_L2 = (gpu.rand(num_hidden_L2, num_hidden_L1 + 1)) * 2 * v - v
    weights2_L2 = (gpu.rand(num_output_L2, num_hidden_L2 + 1)) * 2 * v - v
    num_weights1_L2 = (num_hidden_L1 + 1) * num_hidden_L2
    num_weights2_L2 = (num_hidden_L2 + 1) * num_output_L2
    #weights1_L2 = reshape(weights1_L2, num_weights1_L2)
    weights1_L2 = weights1_L2.reshape(num_weights1_L2)
    #weights2_L2 = reshape(weights2_L2, num_weights2_L2)
    weights2_L2 = weights2_L2.reshape(num_weights2_L2)
    weights_L2 = hstack(
        (weights1_L2.as_numpy_array(), weights2_L2.as_numpy_array()))
    args = (num_hidden_L1, num_hidden_L2, num_output_L2, L1_activation,
            lambda_val, sparsityParam, beta)
    print "Level 2 Abstraction Starting...."
    opttheta_L2 = optimize.fmin_l_bfgs_b(costfunc_gpu,
                                         weights_L2,
                                         fprime=grad_costfunc_gpu,
                                         args=args,
                                         maxiter=400)
    weights_L2 = gpu.garray(opttheta_L2[0])
    #weights1_L2 = reshape(weights_L2[0:num_weights1_L2],(num_hidden_L2,num_hidden_L1+1))
    weights1_L2 = weights_L2[0:num_weights1_L2].reshape(
        (num_hidden_L2, num_hidden_L1 + 1))
    weights2_L2 = weights_L2[num_weights1_L2:shape(weights_L2)[0]].reshape(
        (num_output_L2, num_hidden_L2 + 1))
    scipy.io.savemat('MINSTLevel2.mat',
                     mdict={
                         'learntFeaturesL2_1': weights1_L2.as_numpy_array(),
                         'learntFeaturesL2_2': weights2_L2.as_numpy_array()
                     })
    L2_activation = feedforward(weights1_L2, L1_activation)
    del weights_L2
    del weights1_L2
    del weights2_L2
    gpu.free_reuse_cache()
    u = gpu.sqrt(6) / gpu.sqrt(num_hidden_L3 + num_hidden_L2 + 1)
    weights1_L3 = (gpu.rand(num_hidden_L3, num_hidden_L2 + 1)) * 2 * u - u
    weights2_L3 = (gpu.rand(num_output_L3, num_hidden_L3 + 1)) * 2 * u - u
    num_weights1_L3 = (num_hidden_L2 + 1) * num_hidden_L3
    num_weights2_L3 = (num_hidden_L3 + 1) * num_output_L3
    #weights1_L3 = reshape(weights1_L3, num_weights1_L3)
    weights1_L3 = weights1_L3.reshape(num_weights1_L3)
    #weights2_L3 = reshape(weights2_L3, num_weights2_L3)
    weights2_L3 = weights2_L3.reshape(num_weights2_L3)
    weights_L3 = hstack(
        (weights1_L3.as_numpy_array(), weights2_L3.as_numpy_array()))
    args = (num_hidden_L2, num_hidden_L3, num_output_L3, L2_activation,
            lambda_val, sparsityParam, beta)
    print "Level 3 Abstraction Starting...."
    opttheta_L3 = optimize.fmin_l_bfgs_b(costfunc_gpu,
                                         weights_L3,
                                         fprime=grad_costfunc_gpu,
                                         args=args,
                                         maxiter=400)
    weights_L3 = gpu.garray(opttheta_L3[0])
    #weights1_L3 = reshape(weights_L3[0:num_weights1_L3],(num_hidden_L3,num_hidden_L2+1))
    weights1_L3 = weights_L3[0:num_weights1_L3].reshape(
        (num_hidden_L3, num_hidden_L2 + 1))
    weights2_L3 = weights_L3[num_weights1_L3:shape(weights_L3)[0]].reshape(
        (num_output_L3, num_hidden_L3 + 1))
    scipy.io.savemat('MINSTLevel3.mat',
                     mdict={
                         'learntFeaturesL3_1': weights1_L3.as_numpy_array(),
                         'learntFeaturesL3_2': weights2_L3.as_numpy_array()
                     })
    L3_activation = feedforward(weights1_L3, L2_activation)
    del weights_L3
    del weights1_L3
    del weights2_L3
    gpu.free_reuse_cache()
    print "Abstraction completed."
    return L3_activation
예제 #39
0
                self.hActs[i].T)
            self.grad[i][1] = (1. / self.mbSize) * gp.sum(
                self.deltas[i], axis=1).reshape(-1, 1)

        return cost, self.grad

    def updateParams(self, scale, update):

        self.stack = [[ws[0] + scale * wsDelta[0], ws[1] + scale * wsDelta[1]]
                      for ws, wsDelta in zip(self.stack, update)]


if __name__ == '__main__':
    inputDim = 5
    outputDim = 10
    layerSizes = [100, 100, 300]
    mbSize = 5

    # fake data
    data = gp.rand(inputDim, mbSize)
    import random
    labels = [random.randint(0, 9)] * mbSize

    # make nnet
    nn = NNet(inputDim, outputDim, layerSizes, mbSize, train=True)
    nn.initParams()

    # run
    cost, grad = nn.costAndGrad(data, labels)
    print cost
예제 #40
0
 def sampleStates(self, acts):
     return gnp.rand(*acts.shape) <= acts
예제 #41
0
파일: rbm_utils.py 프로젝트: xy1234552/fang
def sample_units(inputs):
    return gnp.rand(inputs.shape) < sigmoid(inputs)