def apply_nn_test(P, net, nCxt, outLayer, feat_dir, FeatList, outFeatDir, useDropout):
    "Sends the test features for feedforward, and applies the PCA calculated from training files"

    fdir = '';      
    inFeatList = open(feat_dir + FeatList).readlines()

    for fname in inFeatList:
        if fname == '\n':
            continue
        elif fname.rstrip()[-1] == ':':
            fdir = fname.rstrip()[:-1]+'/'
            print fdir
            continue
        elif fname.rstrip()[-3:]=='txt':
            utt = np.loadtxt(feat_dir + fdir + fname[:-1])
            # if not useDropout:
            outputs = gpu.as_numpy_array(net.fprop_xf(utt, outLayer))
            # else:
            # outputs = gpu.as_numpy_array(net.fpropDropout(utt, outLayer))
                
            assert(outputs.shape[1] == 40)
            outputs = np.dot(outputs, P)
            # if i/1*1 == i:
                # gpu.free_reuse_cache()

        outfile=htkmfc.HTKFeat_write(feat_dir + outFeatDir + 'test_feat/' + fdir[-9:] + fname[:-5], outputs.shape[1], htkmfc.USER)
        outfile.writeall(outputs)
        del outfile
        del outputs
        gpu.free_reuse_cache()
Beispiel #2
0
def gradcheck(epsilon=1e-4):

    import dataLoader as dl
    import random 

    loader = dl.DataLoader('/scail/group/deeplearning/speech/awni/kaldi-stanford/kaldi-trunk/egs/timit/s5/exp/nn_train/',41*23,41*23)
    nn = NNet(41*23,41*23,[1024])
    nn.initParams()

    data_dict,alis,keys,sizes = loader.loadDataFileDict(1)

    k = random.sample(keys,1)[0]

    data = gp.garray(data_dict[k])
    labels = np.array(alis[k],dtype=np.int32)

    cost,grad,_ = nn.costAndGrad(data,labels)
    print data.shape
    print labels.shape

    while True:
        m,n = nn.stack[1][0].shape
        msample,nsample = random.randint(0,m-1),random.randint(0,n-1)
        nn.stack[1][0][msample,nsample] += epsilon

        cost2,grad,_ = nn.costAndGrad(data,labels)
    
        nn.stack[1][0][msample,nsample] -= epsilon

        finite_diff = (cost2 - cost) / epsilon
        print "Analytic %.6f -- Finite %.6f"%(grad[1][0][msample,nsample],finite_diff)
            
        # Clear gp mem
        gp.free_reuse_cache()
def linear_decoder_run(data, numInput, numHidden):
    print "Starting Feature Abstraction..."
    gpu.free_reuse_cache()
    num_input = numInput
    num_hidden = numHidden
    num_output = numInput
    lambda_val = 3e-4
    sparsityParam = 0.035
    beta = 5
    inputs = data
    r = sqrt(6) / sqrt(num_hidden + num_input + 1)
    weights1 = (random.rand(num_hidden, num_input + 1)) * 2 * r - r
    weights2 = (random.rand(num_output, num_hidden + 1)) * 2 * r - r
    num_weights1 = (num_input + 1) * num_hidden
    num_weights2 = (num_hidden + 1) * num_output
    weights1 = reshape(weights1, num_weights1)
    weights2 = reshape(weights2, num_weights2)
    weights = hstack((weights1, weights2))
    args = (num_input, num_hidden, num_output, inputs, lambda_val,
            sparsityParam, beta)
    opttheta = optimize.fmin_l_bfgs_b(costfunc,
                                      weights,
                                      fprime=grad_costfunc,
                                      args=args,
                                      maxiter=500)
    weights = opttheta[0]
    weights1 = reshape(weights[0:num_weights1], (num_hidden, num_input + 1))
    weights2 = reshape(weights[num_weights1:shape(weights)[0]],
                       (num_output, num_hidden + 1))
    scipy.io.savemat('learntFeatures.mat', mdict={'learntFeatures': weights1})
    return weights1
def mlpSingleOutput1Layer_costfunc(x, *args):
    inputSize, l1Size, lambda_hidden, inputs, targets = args
    numCases = shape(inputs)[1]
    num_weights_L1 = l1Size * (inputSize + 1)
    inputs = gpu.garray(inputs)
    targets = gpu.garray(targets)
    theta_L1 = gpu.garray(reshape(x[0:num_weights_L1], (l1Size, inputSize + 1)))
    theta_output = gpu.garray(reshape(x[num_weights_L1:shape(x)[0]], (1, l1Size+1)))
    inputs = gpu.concatenate((gpu.ones((1,numCases)), inputs), axis = 0)
    hidden_sum_L1 = gpu.dot(theta_L1, inputs)
    hidden_activation_L1 = hidden_sum_L1.logistic()
    hidden_activation_L1 = gpu.concatenate((gpu.ones((1,numCases)), hidden_activation_L1), axis = 0)
    #hidden_activation_L1 = hidden_activation_L1 * dropout_prob
    hidden_sum_output = gpu.dot(theta_output, hidden_activation_L1)
    outputs = hidden_sum_output.logistic()
    output_target_diff = (outputs - targets)**2
    regularized_penalty_output = theta_output[:,1:shape(theta_output)[1]]
    regularized_penalty_output = regularized_penalty_output * regularized_penalty_output
    regularized_penalty_L1 = theta_L1[:,1:shape(theta_L1)[1]]
    regularized_penalty_L1 = regularized_penalty_L1 * regularized_penalty_L1
    cost = gpu.sum(output_target_diff)/(2*numCases) + 0.5 * lambda_hidden*(gpu.sum(regularized_penalty_L1)+gpu.sum(regularized_penalty_output))
    print 'Multilayer Preceptron Cost:', cost
    del inputs
    del theta_L1
    del hidden_sum_L1
    del hidden_activation_L1
    del regularized_penalty_output
    del regularized_penalty_L1
    gpu.free_reuse_cache()
    return cost
Beispiel #5
0
    def costAndGradSFO(self,stack,datums):
        """
	Wrapper function used for SFO optimizer.
        """
	N = len(datums)
	cost = 0.
	grad = [[gp.zeros(w.shape),gp.zeros(b.shape)] 
			  for w,b in self.stack]

        # Push stack to device
	self.stack = [[gp.garray(w),gp.garray(b)] 
                        for w,b in stack]

	for datum in datums:
            data = gp.garray(self.data_dict[datum])
            labels =  np.array(self.alis[datum], dtype=np.int32)
	    costSingle,gradSingle,skip = self.costAndGrad(data,labels)
            if skip:
                print "LOGGING SKIP" #TODO what to do here?
                N -= 1
                continue
	    grad = [[gs[0]+g[0],gs[1]+g[1]]
			      for gs,g in zip(gradSingle,grad)]
	    cost += costSingle
            
            # Have to force GC the gpu... gnumpy lameness
	    gp.free_reuse_cache()

        # Pull gradient from device
        grad = [[((1./N)*gw).as_numpy_array(), ((1./N)*gb).as_numpy_array()] 
                      for gw,gb in grad]
        cost *= 1./N

        return cost,grad
Beispiel #6
0
Datei: util.py Projekt: surban/ml
def print_total_garray_size():
    gp.free_reuse_cache()
    tot = 0
    for obj in gc.get_objects():
        if isinstance(obj, gp.garray):
            tot += obj.size
    print "Total GPU memory used by garrays:        %.1f MB" % (tot / 1e6)
    print "Total GPU memory use reported by gnumpy: %.1f MB" % (gp.memory_in_use() / 1e6)
def grad_costfunc_gpu_ReLU(x, *args):
    num_input, num_hidden, num_output, inputs, lambda_val, sparsityParam, beta = args
    num_weights1 = (num_input + 1) * num_hidden
    num_weights2 = (num_hidden + 1) * num_output
    x = gpu.garray(x)
    inputs = gpu.garray(inputs)
    weights1 = x[0:num_weights1].reshape((num_hidden, num_input + 1))
    weights2 = x[num_weights1:shape(x)[0]].reshape(
        (num_output, num_hidden + 1))
    nData = shape(inputs)[1]
    data = gpu.concatenate((gpu.ones((1, nData)), inputs), axis=0)
    hidden_sum = gpu.dot(weights1, data)
    #hidden_activation = gpu.log(1+hidden_sum.exp())
    relu_mask_hidden1 = gpu.ones(shape(hidden_sum)) * (hidden_sum > 0)
    hidden_activation = hidden_sum * relu_mask_hidden1
    #hidden_derivative = hidden_sum.logistic()
    hidden_derivative = relu_mask_hidden1
    hidden_activation = gpu.concatenate((gpu.ones(
        (1, nData)), hidden_activation),
                                        axis=0)
    hidden_derivative = gpu.concatenate((gpu.ones(
        (1, nData)), hidden_derivative),
                                        axis=0)
    outputs = gpu.dot(weights2, hidden_activation)
    weights1_grad = gpu.zeros(shape(weights1))
    weights2_grad = gpu.zeros(shape(weights2))
    p = outputs - inputs
    weights2_grad += gpu.dot(
        p, gpu.garray(transpose(hidden_activation.as_numpy_array())))
    q_temp = gpu.dot(gpu.garray(transpose(weights2.as_numpy_array())), p)
    #q = multiply(multiply(q_temp,hidden_activation),(1-hidden_activation))
    q = q_temp * hidden_derivative
    delta2 = gpu.dot(q, gpu.garray(transpose(data.as_numpy_array())))
    weights1_grad += delta2[1:shape(delta2)[0], :]
    weights1_grad = weights1_grad / nData
    weights2_grad = weights2_grad / nData
    weights1_grad[:, 1:shape(weights1_grad)[1]] = weights1_grad[:, 1:shape(
        weights1_grad)[1]] + weights1[:, 1:shape(weights1)[1]] * lambda_val
    weights2_grad[:, 1:shape(weights2_grad)[1]] = weights2_grad[:, 1:shape(
        weights2_grad)[1]] + weights2[:, 1:shape(weights2)[1]] * lambda_val
    #weights1_grad = reshape(weights1_grad, num_weights1)
    weights1_grad = weights1_grad.reshape(num_weights1)
    #weights2_grad = reshape(weights2_grad, num_weights2)
    weights2_grad = weights2_grad.reshape(num_weights2)
    del x
    del inputs
    del data
    del p
    del q_temp
    del q
    del delta2
    del hidden_sum
    del hidden_activation
    del weights1
    del weights2
    gpu.free_reuse_cache()
    return hstack(
        (weights1_grad.as_numpy_array(), weights2_grad.as_numpy_array()))
 def run_through_network(self, xs):
     hid = xs
     for n_rbm in self.network:
         vis = gp.garray(hid)
         g_rbm = RBM(n_rbm.n_visible, n_rbm.n_hidden, n_rbm.vistype, 
                 n_rbm.hidtype, n_rbm.W, n_rbm.hbias, n_rbm.vbias)
         hid = self.get_activation(g_rbm, hid)
         gp.free_reuse_cache()
     return hid
Beispiel #9
0
 def run_through_network(self, data):
     hid = data
     for n_rbm in self.network:
         #vis = gp.garray(hid)
         g_rbm = RBM(n_rbm.n_visible, n_rbm.n_hidden, n_rbm.vistype, 
                 n_rbm.hidtype, n_rbm.W, n_rbm.hbias, n_rbm.vbias, stream=self.stream)
         hid = self.get_activation(g_rbm, data)
         gp.free_reuse_cache()
     return hid
def mlpSoftmax1Layer_grad(x, *args):
    numClasses, inputSize, l1Size, lambda_softmax, lambda_hidden, inputs, groundTruth = args
    numCases = shape(inputs)[1]
    num_weights_L1 = l1Size * (inputSize + 1)
    num_weights_softmax = numClasses * l1Size
    inputs = gpu.garray(inputs)
    theta_L1 = gpu.garray(reshape(x[0:num_weights_L1],
                                  (l1Size, inputSize + 1)))
    theta_softmax = gpu.garray(
        reshape(x[num_weights_L1:shape(x)[0]], (numClasses, l1Size)))
    theta_L1_grad = gpu.zeros(shape(theta_L1))
    inputs = gpu.concatenate((gpu.ones((1, numCases)), inputs), axis=0)
    hidden_sum_L1 = gpu.dot(theta_L1, inputs)
    #hidden_activation_L1 = gpu.log(1+hidden_sum_L1.exp())
    #hidden_derivative_L1 = hidden_sum_L1.logistic()
    relu_mask_hidden1 = gpu.ones(shape(hidden_sum_L1)) * (hidden_sum_L1 > 0)
    hidden_activation_L1 = hidden_sum_L1 * relu_mask_hidden1
    #hidden_activation_L1 = hidden_sum_L1.logistic()
    hidden_derivative_L1 = relu_mask_hidden1
    hidden_sum_softmax_imd = gpu.dot(theta_softmax, hidden_activation_L1)
    hidden_sum_softmax = hidden_sum_softmax_imd - hidden_sum_softmax_imd.max(
        axis=0)
    predictions = hidden_sum_softmax.exp()
    predictions = predictions / gpu.sum(predictions, axis=0)
    softmax_imd = groundTruth - predictions
    theta_softmax_grad = -1 * gpu.dot(
        softmax_imd,
        gpu.garray(transpose(hidden_activation_L1.as_numpy_array()))
    ) / numCases + lambda_softmax * theta_softmax
    deltaOut = -softmax_imd
    delta_L1_imd = gpu.dot(
        gpu.garray(transpose(theta_softmax.as_numpy_array())), deltaOut)
    delta_L1_imd2 = delta_L1_imd * hidden_derivative_L1
    #delta_L1_imd2 = (delta_L1_imd*hidden_activation_L1)*(1-hidden_activation_L1)
    delta_L1 = gpu.dot(delta_L1_imd2,
                       gpu.garray(transpose(inputs.as_numpy_array())))
    theta_L1_grad += delta_L1
    theta_L1_grad = theta_L1_grad / numCases
    theta_L1_grad[:, 1:shape(theta_L1_grad)[1]] = theta_L1_grad[:, 1:shape(
        theta_L1_grad)[1]] + theta_L1[:, 1:shape(theta_L1)[1]] * lambda_hidden
    theta_L1_grad = reshape(theta_L1_grad.as_numpy_array(), num_weights_L1)
    theta_softmax_grad = reshape(theta_softmax_grad.as_numpy_array(),
                                 num_weights_softmax)
    del inputs
    del theta_L1
    del theta_softmax
    del hidden_sum_L1
    del hidden_activation_L1
    del hidden_sum_softmax
    del predictions
    del softmax_imd
    del deltaOut
    del delta_L1_imd
    del delta_L1_imd2
    del delta_L1
    gpu.free_reuse_cache()
    return hstack((theta_L1_grad, theta_softmax_grad))
Beispiel #11
0
def getJacobian(nn_input, frames):
    bfs =[i for i in nn_input.cfg.bfs()]
    bfs.reverse()
   
    Z_new = nn_input._layers['MIL_pool'].Z
    _,delta = nn_input._layers['MIL_pool']._ComputeParamGradient(Z_new * nn_input._layers['MIL_pool'].A.reshape(frames,17,1,1))
    for l in bfs[3:-1]:
        gnp.free_reuse_cache()
        delta = nn_input._layers[l].BackProp(gnp.relu(delta))
    return delta.as_numpy_array()
Beispiel #12
0
def mlpSoftmax_costfunc(x, *args):
    numClasses, inputSize, l1Size, l2Size, lambda_softmax, lambda_hidden, inputs, labels, groundTruth = args
    numCases = shape(inputs)[1]
    num_weights_L1 = l1Size * (inputSize + 1)
    num_weights_L2 = l2Size * (l1Size + 1)
    #x = gpu.garray(x)
    inputs = gpu.garray(inputs)
    theta_L1 = gpu.garray(reshape(x[0:num_weights_L1],
                                  (l1Size, inputSize + 1)))
    #theta_L1 = x[0:num_weights_L1].reshape((l1Size, inputSize + 1))
    #print numClasses, l2Size
    theta_L2 = gpu.garray(
        reshape(x[num_weights_L1:num_weights_L2 + num_weights_L1],
                (l2Size, l1Size + 1)))
    #theta_L2 = x[num_weights_L1:num_weights_L2+num_weights_L1].reshape((l2Size, l1Size + 1))
    theta_softmax = gpu.garray(
        reshape(x[num_weights_L2 + num_weights_L1:shape(x)[0]],
                (numClasses, l2Size)))
    #theta_softmax = x[num_weights_L2+num_weights_L1:shape(x)[0]].reshape((numClasses, l2Size))
    inputs = gpu.concatenate((gpu.ones((1, numCases)), inputs), axis=0)
    hidden_sum_L1 = gpu.dot(theta_L1, inputs)
    hidden_activation_L1 = hidden_sum_L1.logistic()
    hidden_activation_L1 = gpu.concatenate((gpu.ones(
        (1, numCases)), hidden_activation_L1),
                                           axis=0)
    hidden_sum_L2 = gpu.dot(theta_L2, hidden_activation_L1)
    hidden_activation_L2 = hidden_sum_L2.logistic()
    hidden_sum_softmax = gpu.dot(theta_softmax, hidden_activation_L2)
    hidden_sum_softmax = hidden_sum_softmax - hidden_sum_softmax.max(axis=0)
    predictions = hidden_sum_softmax.exp()
    predictions = predictions / gpu.sum(predictions, axis=0)
    temp = groundTruth * gpu.log(predictions)
    regularized_penalty_L1 = theta_L1[:, 1:shape(theta_L1)[1]]
    regularized_penalty_L2 = theta_L2[:, 1:shape(theta_L2)[1]]
    regularized_penalty_L1 = regularized_penalty_L1 * regularized_penalty_L1
    regularized_penalty_L2 = regularized_penalty_L2 * regularized_penalty_L2
    cost = -1 * gpu.sum(temp) / numCases + 0.5 * lambda_hidden * (
        gpu.sum(regularized_penalty_L1) + gpu.sum(regularized_penalty_L2)
    ) + 0.5 * lambda_softmax * gpu.sum(theta_softmax * theta_softmax)
    print 'Multilayer Softmax Cost:', cost
    del inputs
    del theta_L1
    del theta_L2
    del theta_softmax
    del hidden_sum_L1
    del hidden_activation_L1
    del hidden_sum_L2
    del hidden_activation_L2
    del hidden_sum_softmax
    del predictions
    del temp
    del regularized_penalty_L1
    del regularized_penalty_L2
    gpu.free_reuse_cache()
    return cost
Beispiel #13
0
def getJacobian(nn_input, frames):
    bfs = [i for i in nn_input.cfg.bfs()]
    bfs.reverse()

    Z_new = nn_input._layers['MIL_pool'].Z
    _, delta = nn_input._layers['MIL_pool']._ComputeParamGradient(
        Z_new * nn_input._layers['MIL_pool'].A.reshape(frames, 17, 1, 1))
    for l in bfs[3:-1]:
        gnp.free_reuse_cache()
        delta = nn_input._layers[l].BackProp(gnp.relu(delta))
    return delta.as_numpy_array()
Beispiel #14
0
def getJacobian_per_class(nn_input, loc, frames):
    bfs =[i for i in nn_input.cfg.bfs()]
    bfs.reverse()
  
    Z_new = nn_input._layers['MIL_pool'].Z
    Z_inv = gnp.zeros(Z_new.shape)
    Z_inv[:,loc,:,:] = Z_new[:,loc,:,:] * nn_input._layers['MIL_pool'].A[:,loc].reshape(frames,1,1)
    _,delta = nn_input._layers['MIL_pool']._ComputeParamGradient(Z_inv)
    for l in bfs[3:-1]:
        gnp.free_reuse_cache()
        delta = nn_input._layers[l].BackProp(gnp.relu(delta))
    return delta.as_numpy_array()
 def run_down_through_network(self, xs):
     hid = xs
     
     copy = self.network[:]
     copy.reverse()
     
     for n_rbm in copy:
         vis = gp.garray(hid)
         g_rbm = RBM(n_rbm.n_visible, n_rbm.n_hidden, n_rbm.vistype, 
                 n_rbm.hidtype, n_rbm.W, n_rbm.hbias, n_rbm.vbias)
         hid = self.get_visible(g_rbm, hid)
         gp.free_reuse_cache()
     return hid
Beispiel #16
0
def getJacobian_per_class(nn_input, loc, frames):
    bfs = [i for i in nn_input.cfg.bfs()]
    bfs.reverse()

    Z_new = nn_input._layers['MIL_pool'].Z
    Z_inv = gnp.zeros(Z_new.shape)
    Z_inv[:, loc, :, :] = Z_new[:, loc, :, :] * nn_input._layers[
        'MIL_pool'].A[:, loc].reshape(frames, 1, 1)
    _, delta = nn_input._layers['MIL_pool']._ComputeParamGradient(Z_inv)
    for l in bfs[3:-1]:
        gnp.free_reuse_cache()
        delta = nn_input._layers[l].BackProp(gnp.relu(delta))
    return delta.as_numpy_array()
Beispiel #17
0
def grad_costfunc_gpu(x, *args):
    num_input,num_hidden,num_output,inputs,lambda_val,sparsityParam,beta = args
    num_weights1 = (num_input+1)*num_hidden
    num_weights2 = (num_hidden+1)*num_output
    x = gpu.garray(x)
    inputs = gpu.garray(inputs)
    weights1 = x[0:num_weights1].reshape((num_hidden,num_input+1))
    weights2 = x[num_weights1:shape(x)[0]].reshape((num_output,num_hidden+1))
    nData = shape(inputs)[1]
    data = gpu.concatenate((gpu.ones((1,nData)), inputs), axis = 0)
    hidden_sum = gpu.dot(weights1, data)
    hidden_activation = hidden_sum.logistic()
    p_avg = gpu.sum(hidden_activation,axis=1)/nData
    grad_sparse = -1*sparsityParam/p_avg.as_numpy_array() + (1-sparsityParam)/(1-p_avg.as_numpy_array())
    grad_sparse = append(0,grad_sparse)
    grad_sparse = tile(grad_sparse, (nData, 1))
    grad_sparse = gpu.garray(transpose(grad_sparse))
    hidden_activation = gpu.concatenate((gpu.ones((1,nData)), hidden_activation), axis = 0)
    outputs = gpu.dot(weights2, hidden_activation)
    weights1_grad = gpu.zeros(shape(weights1))
    weights2_grad = gpu.zeros(shape(weights2))
    p = outputs-inputs
    weights2_grad += gpu.dot(p, gpu.garray(transpose(hidden_activation.as_numpy_array())))
    q_temp = gpu.dot(gpu.garray(transpose(weights2.as_numpy_array())),p) + beta*grad_sparse
    #q = multiply(multiply(q_temp,hidden_activation),(1-hidden_activation))
    q = (q_temp*hidden_activation)*(1-hidden_activation)
    delta2 = gpu.dot(q, gpu.garray(transpose(data.as_numpy_array())))
    weights1_grad += delta2[1:shape(delta2)[0], :]
    weights1_grad = weights1_grad/nData
    weights2_grad = weights2_grad/nData
    weights1_grad[:,1:shape(weights1_grad)[1]] = weights1_grad[:,1:shape(weights1_grad)[1]] + weights1[:,1:shape(weights1)[1]] * lambda_val
    weights2_grad[:,1:shape(weights2_grad)[1]] = weights2_grad[:,1:shape(weights2_grad)[1]] + weights2[:,1:shape(weights2)[1]] * lambda_val
    #weights1_grad = reshape(weights1_grad, num_weights1)
    weights1_grad = weights1_grad.reshape(num_weights1)
    #weights2_grad = reshape(weights2_grad, num_weights2)
    weights2_grad = weights2_grad.reshape(num_weights2)
    del x
    del inputs
    del data
    del grad_sparse
    del p
    del q_temp
    del q
    del delta2
    del hidden_sum
    del hidden_activation
    del weights1
    del weights2
    gpu.free_reuse_cache()
    return hstack((weights1_grad.as_numpy_array(),weights2_grad.as_numpy_array()))
def costfunc_gpu(x, *args):
    num_input, num_hidden, num_output, inputs, noNoiseData, lambda_val, sparsityParam, beta = args
    num_weights1 = (num_input + 1) * num_hidden
    x = gpu.garray(x)
    #    randomNoise = random.random_sample(shape(inputs))
    #    criteriaTable = randomNoise > 0.32
    #    inputs = inputs * criteriaTable
    inputs = gpu.garray(inputs)
    noNoiseData = gpu.garray(noNoiseData)
    #weights1 = gpu.garray(reshape(x[0:num_weights1],(num_hidden,num_input+1)))
    weights1 = x[0:num_weights1].reshape((num_hidden, num_input + 1))
    #weights2 = gpu.garray(reshape(x[num_weights1:shape(x)[0]], (num_output,num_hidden+1)))
    weights2 = x[num_weights1:shape(x)[0]].reshape(
        (num_output, num_hidden + 1))
    nData = shape(inputs)[1]
    data = gpu.concatenate((gpu.ones((1, nData)), inputs), axis=0)
    hidden_sum = gpu.dot(weights1, data)
    hidden_activation = hidden_sum.logistic()
    p_avg = gpu.sum(hidden_activation, axis=1) / nData
    hidden_activation = gpu.concatenate((gpu.ones(
        (1, nData)), hidden_activation),
                                        axis=0)
    output = gpu.dot(weights2, hidden_activation)
    regularized_penalty1 = weights1[:, 1:shape(weights1)[1]]
    regularized_penalty2 = weights2[:, 1:shape(weights2)[1]]
    regularized_penalty1 = regularized_penalty1 * regularized_penalty1
    regularized_penalty2 = regularized_penalty2 * regularized_penalty2
    output_target_diff = (output - noNoiseData) * (output - noNoiseData)
    KL = gpu.sum(sparsityParam * gpu.log(sparsityParam / p_avg) +
                 (1 - sparsityParam) * gpu.log((1 - sparsityParam) /
                                               (1 - p_avg)))
    cost = gpu.sum(output_target_diff) / (2 * nData) + 0.5 * lambda_val * (
        gpu.sum(regularized_penalty1) +
        gpu.sum(regularized_penalty2)) + beta * KL
    print 'GPU Linear Denoising Decoder Cost: ', cost
    del x
    del inputs
    del noNoiseData
    del data
    del hidden_sum
    del hidden_activation
    del p_avg
    del output
    del regularized_penalty1
    del regularized_penalty2
    del weights1
    del weights2
    del output_target_diff
    gpu.free_reuse_cache()
    return cost
def mlpSingleOutput1Layer_grad(x, *args):
    inputSize, l1Size, lambda_hidden, inputs, targets = args
    numCases = shape(inputs)[1]
    num_weights_L1 = l1Size * (inputSize + 1)
    num_weights_output = 1 * (l1Size + 1)
    inputs = gpu.garray(inputs)
    targets = gpu.garray(targets)
    theta_L1 = gpu.garray(reshape(x[0:num_weights_L1],
                                  (l1Size, inputSize + 1)))
    theta_output = gpu.garray(
        reshape(x[num_weights_L1:shape(x)[0]], (1, l1Size + 1)))
    inputs = gpu.concatenate((gpu.ones((1, numCases)), inputs), axis=0)
    hidden_sum_L1 = gpu.dot(theta_L1, inputs)
    hidden_activation_L1 = hidden_sum_L1.logistic()
    hidden_activation_L1 = gpu.concatenate((gpu.ones(
        (1, numCases)), hidden_activation_L1),
                                           axis=0)
    #hidden_activation_L1 = hidden_activation_L1 * dropout_prob
    hidden_sum_output = gpu.dot(theta_output, hidden_activation_L1)
    outputs = hidden_sum_output.logistic()
    theta_L1_grad = gpu.zeros(shape(theta_L1))
    theta_output_grad = gpu.zeros(shape(theta_output))
    a = (outputs - targets) * outputs * (1 - outputs)
    theta_output_grad += gpu.dot(
        a, gpu.garray(transpose(hidden_activation_L1.as_numpy_array())))
    b_temp = gpu.dot(gpu.garray(transpose(theta_output.as_numpy_array())), a)
    b = (b_temp * hidden_activation_L1) * (1 - hidden_activation_L1)
    delta2 = gpu.dot(b, gpu.garray(transpose(inputs.as_numpy_array())))
    theta_L1_grad += delta2[1:shape(delta2)[0], :]
    theta_L1_grad = theta_L1_grad / numCases
    theta_output_grad = theta_output_grad / numCases
    theta_output_grad[:, 1:shape(
        theta_output_grad)[1]] = theta_output_grad[:, 1:shape(
            theta_output_grad
        )[1]] + theta_output[:, 1:shape(theta_output)[1]] * lambda_hidden
    theta_L1_grad[:, 1:shape(theta_L1_grad)[1]] = theta_L1_grad[:, 1:shape(
        theta_L1_grad)[1]] + theta_L1[:, 1:shape(theta_L1)[1]] * lambda_hidden
    theta_output_grad = reshape(theta_output_grad.as_numpy_array(),
                                num_weights_output)
    theta_L1_grad = reshape(theta_L1_grad.as_numpy_array(), num_weights_L1)
    del inputs
    del theta_L1
    del hidden_sum_L1
    del hidden_activation_L1
    gpu.free_reuse_cache()
    return hstack((theta_L1_grad, theta_output_grad))
def mlpSoftmax_costfunc(x, *args):
    numClasses, inputSize, l1Size, l2Size, lambda_softmax, lambda_hidden, inputs, labels, groundTruth = args
    numCases = shape(inputs)[1]
    num_weights_L1 = l1Size * (inputSize + 1)
    num_weights_L2 = l2Size * (l1Size + 1)
    #x = gpu.garray(x)
    inputs = gpu.garray(inputs)
    theta_L1 = gpu.garray(reshape(x[0:num_weights_L1], (l1Size, inputSize + 1)))
    #theta_L1 = x[0:num_weights_L1].reshape((l1Size, inputSize + 1))
    #print numClasses, l2Size
    theta_L2 = gpu.garray(reshape(x[num_weights_L1:num_weights_L2+num_weights_L1], (l2Size, l1Size + 1)))
    #theta_L2 = x[num_weights_L1:num_weights_L2+num_weights_L1].reshape((l2Size, l1Size + 1))
    theta_softmax = gpu.garray(reshape(x[num_weights_L2+num_weights_L1:shape(x)[0]], (numClasses, l2Size)))
    #theta_softmax = x[num_weights_L2+num_weights_L1:shape(x)[0]].reshape((numClasses, l2Size))
    inputs = gpu.concatenate((gpu.ones((1,numCases)), inputs), axis = 0)
    hidden_sum_L1 = gpu.dot(theta_L1, inputs)
    hidden_activation_L1 = hidden_sum_L1.logistic()
    hidden_activation_L1 = gpu.concatenate((gpu.ones((1,numCases)), hidden_activation_L1), axis=0)
    hidden_sum_L2 = gpu.dot(theta_L2, hidden_activation_L1)
    hidden_activation_L2 = hidden_sum_L2.logistic()
    hidden_sum_softmax = gpu.dot(theta_softmax, hidden_activation_L2)
    hidden_sum_softmax = hidden_sum_softmax - hidden_sum_softmax.max(axis = 0)
    predictions = hidden_sum_softmax.exp()
    predictions = predictions / gpu.sum(predictions,axis = 0)
    temp = groundTruth*gpu.log(predictions)
    regularized_penalty_L1 = theta_L1[:,1:shape(theta_L1)[1]]
    regularized_penalty_L2 = theta_L2[:,1:shape(theta_L2)[1]]
    regularized_penalty_L1 = regularized_penalty_L1 * regularized_penalty_L1
    regularized_penalty_L2 = regularized_penalty_L2 * regularized_penalty_L2
    cost = -1*gpu.sum(temp)/numCases + 0.5 * lambda_hidden*(gpu.sum(regularized_penalty_L1) + gpu.sum(regularized_penalty_L2)) + 0.5 * lambda_softmax * gpu.sum(theta_softmax*theta_softmax)
    print 'Multilayer Softmax Cost:', cost
    del inputs
    del theta_L1
    del theta_L2
    del theta_softmax
    del hidden_sum_L1
    del hidden_activation_L1
    del hidden_sum_L2
    del hidden_activation_L2
    del hidden_sum_softmax
    del predictions
    del temp
    del regularized_penalty_L1
    del regularized_penalty_L2
    gpu.free_reuse_cache()
    return cost
def running(inputData, l1Size, l2Size):
    inputs = inputData
    inputSize = 30
    sparsityParam = 0.05
    lambda_val = 7e-5
    lambda_valFineTune = 1e-5
    beta = 3
    multilayer_feature_learning(inputs, inputSize, l1Size, l2Size, sparsityParam, lambda_val, beta)
    weights1 = scipy.io.loadmat('HiggsBosonLevel1.mat')['learntFeaturesL1_1']
    weights2 = scipy.io.loadmat('HiggsBosonLevel2.mat')['learntFeaturesL2_1']
    weights3 = scipy.io.loadmat('HiggsBosonLevel2.mat')['learntFeaturesL2_2']
    weights4 = scipy.io.loadmat('HiggsBosonLevel1.mat')['learntFeaturesL1_2']
    gpu.free_reuse_cache()
    print "Fine Tuning the abstraction network..."
    num_input = inputSize
    num_hidden1 = l1Size
    num_hidden2 = l2Size
    num_hidden3 = l1Size
    num_output = num_input
    num_weights1 = (num_input+1)*num_hidden1
    num_weights2 = (num_hidden1+1)*num_hidden2
    num_weights3 = (num_hidden2+1)*num_hidden3
    num_weights4 = (num_hidden3+1)*num_output
    weights1 = reshape(weights1, num_weights1)
    weights2 = reshape(weights2, num_weights2)
    weights3 = reshape(weights3, num_weights3)
    weights4 = reshape(weights4, num_weights4)
    weights = hstack((weights1,weights2,weights3,weights4))
    print "Fine Tuning Starting..."
    stepSize = 200000
    for i in range(int(shape(inputs)[1]/stepSize)):
        print "Batch:", i
        data = inputs[:,i*stepSize:(i+1)*stepSize]
        args = (num_input, num_hidden1, num_hidden2, num_hidden3, lambda_valFineTune, data)
        opttheta = optimize.fmin_l_bfgs_b(fine_tuning_cost_gpu, weights, fprime=fine_tuning_grad_gpu, args=args, maxiter=200)
        weights = opttheta[0]
        del opttheta
        gpu.free_reuse_cache()
    weights1 = reshape(weights[0:num_weights1], (l1Size, inputSize + 1))
    weights2 = reshape(weights[num_weights1:num_weights1+num_weights2], (l2Size, l1Size + 1))
    weights3 = reshape(weights[num_weights1+num_weights2:num_weights1+num_weights2+num_weights3], (num_hidden3, l2Size + 1))
    weights4 = reshape(weights[num_weights1+num_weights2+num_weights3:shape(weights)[0]], (inputSize, num_hidden3 + 1))
    scipy.io.savemat('HiggsBoson_FineTuned_features2Layers.mat', mdict={'learntFeaturesL1': weights1,'learntFeaturesL2': weights2, 'learntFeaturesL3': weights3, 'learntFeaturesL4': weights4})
    return weights1, weights2
Beispiel #22
0
    def run_through_network(self, data, net=None):
        '''
        Gets the output of the top layer of the network given input data on the 
        bottom.

        args:   
            array data: the input data
            obj net:    the network to use, default is self.network
        returns:
            array hid:  the activation of the top layer 
        '''
        if net is None:
            net = self.network
        hid = data
        for layer in net:
            vis = gp.garray(hid)
            hid = self.get_activation(layer, vis)
            gp.free_reuse_cache()
        return hid
Beispiel #23
0
    def _compute_loss(self, X, T, batch_size=1000):
        n_total = X.shape[0]
        n_batches = n_total / batch_size
        loss = 0
        for i in range(n_batches):
            gnp.free_reuse_cache()
            i_start = i * batch_size
            if i < n_batches - 1:
                i_end = i_start + batch_size
            else:
                i_end = n_total

            Xbatch = X[i_start:i_end]
            Tbatch = T[i_start:i_end]

            self._forward(Xbatch)
            loss += self.output.loss(Tbatch)
        
        return loss / n_total
def costfunc_gpu_ReLU(x, *args):
    num_input, num_hidden, num_output, inputs, lambda_val, sparsityParam, beta = args
    num_weights1 = (num_input + 1) * num_hidden
    x = gpu.garray(x)
    inputs = gpu.garray(inputs)
    #weights1 = gpu.garray(reshape(x[0:num_weights1],(num_hidden,num_input+1)))
    weights1 = x[0:num_weights1].reshape((num_hidden, num_input + 1))
    #weights2 = gpu.garray(reshape(x[num_weights1:shape(x)[0]], (num_output,num_hidden+1)))
    weights2 = x[num_weights1:shape(x)[0]].reshape(
        (num_output, num_hidden + 1))
    nData = shape(inputs)[1]
    data = gpu.concatenate((gpu.ones((1, nData)), inputs), axis=0)
    hidden_sum = gpu.dot(weights1, data)
    #hidden_activation = gpu.log(1+hidden_sum.exp())
    relu_mask_hidden1 = gpu.ones(shape(hidden_sum)) * (hidden_sum > 0)
    hidden_activation = hidden_sum * relu_mask_hidden1
    hidden_activation = gpu.concatenate((gpu.ones(
        (1, nData)), hidden_activation),
                                        axis=0)
    output = gpu.dot(weights2, hidden_activation)
    regularized_penalty1 = weights1[:, 1:shape(weights1)[1]]
    regularized_penalty2 = weights2[:, 1:shape(weights2)[1]]
    regularized_penalty1 = regularized_penalty1 * regularized_penalty1
    regularized_penalty2 = regularized_penalty2 * regularized_penalty2
    output_target_diff = (output - inputs) * (output - inputs)
    cost = gpu.sum(output_target_diff) / (2 * nData) + 0.5 * lambda_val * (
        gpu.sum(regularized_penalty1) + gpu.sum(regularized_penalty2))
    print 'GPU ReLU Linear Decoder Cost: ', cost
    del x
    del inputs
    del data
    del hidden_sum
    del hidden_activation
    del output
    del regularized_penalty1
    del regularized_penalty2
    del weights1
    del weights2
    del output_target_diff
    gpu.free_reuse_cache()
    return cost
def mlpSoftmax1Layer_costfunc(x, *args):
    numClasses, inputSize, l1Size, lambda_softmax, lambda_hidden, inputs, groundTruth = args
    numCases = shape(inputs)[1]
    num_weights_L1 = l1Size * (inputSize + 1)
    inputs = gpu.garray(inputs)
    theta_L1 = gpu.garray(reshape(x[0:num_weights_L1],
                                  (l1Size, inputSize + 1)))
    theta_softmax = gpu.garray(
        reshape(x[num_weights_L1:shape(x)[0]], (numClasses, l1Size)))
    inputs = gpu.concatenate((gpu.ones((1, numCases)), inputs), axis=0)
    hidden_sum_L1 = gpu.dot(theta_L1, inputs)
    #hidden_activation_L1 = gpu.log(1+hidden_sum_L1.exp())
    relu_mask_hidden1 = gpu.ones(shape(hidden_sum_L1)) * (hidden_sum_L1 > 0)
    hidden_activation_L1 = hidden_sum_L1 * relu_mask_hidden1
    #hidden_activation_L1 = hidden_sum_L1.logistic()
    hidden_sum_softmax = gpu.dot(theta_softmax, hidden_activation_L1)
    hidden_sum_softmax = hidden_sum_softmax - hidden_sum_softmax.max(axis=0)
    predictions = hidden_sum_softmax.exp()
    predictions = predictions / gpu.sum(predictions, axis=0)
    temp = groundTruth * gpu.log(predictions)
    temp = temp.as_numpy_array()
    temp[temp == -inf] = -200.0
    temp = nan_to_num(temp)
    regularized_penalty_L1 = theta_L1[:, 1:shape(theta_L1)[1]]
    regularized_penalty_L1 = regularized_penalty_L1 * regularized_penalty_L1
    cost = -1 * sum(temp) / numCases + 0.5 * lambda_hidden * (
        gpu.sum(regularized_penalty_L1)) + 0.5 * lambda_softmax * gpu.sum(
            theta_softmax * theta_softmax)
    print 'Multilayer Softmax Cost:', cost
    del inputs
    del theta_L1
    del theta_softmax
    del hidden_sum_L1
    del hidden_activation_L1
    del hidden_sum_softmax
    del predictions
    del temp
    del regularized_penalty_L1
    gpu.free_reuse_cache()
    return cost
Beispiel #26
0
def test(opts):
    import editDistance as ed

    print "Testing model %s" % opts.inFile

    phone_map = get_phone_map_swbd()

    with open(opts.inFile, 'r') as fid:
        old_opts = pickle.load(fid)
        _ = pickle.load(fid)
        _ = pickle.load(fid)
        loader = dl.DataLoader(opts.dataDir, old_opts.rawDim,
                               old_opts.inputDim)
        if 'layers' not in dir(old_opts):
            old_opts.layers = [old_opts.layerSize] * old_opts.numLayers
        nn = nnet.NNet(old_opts.inputDim,
                       old_opts.outputDim,
                       old_opts.layers,
                       train=False)
        nn.initParams()
        nn.fromFile(fid)

    totdist = numphones = 0

    fid = open('hyp.txt', 'w')
    for i in range(1, opts.numFiles + 1):
        data_dict, alis, keys, sizes = loader.loadDataFileDict(i)
        for k in keys:
            gp.free_reuse_cache()
            hyp = nn.costAndGrad(data_dict[k])
            hyp = [phone_map[h] for h in hyp]
            ref = [phone_map[int(r)] for r in alis[k]]
            dist, ins, dels, subs, corr = ed.edit_distance(ref, hyp)
            print "Distance %d/%d" % (dist, len(ref))
            fid.write(k + ' ' + ' '.join(hyp) + '\n')
            totdist += dist
            numphones += len(alis[k])

    fid.close()
    print "PER : %f" % (100 * totdist / float(numphones))
Beispiel #27
0
def train_kmeans_layer(X, in_shape, K, ksize, n_patches_per_image, prep_type=None, pad_h=0, pad_w=0, repeat=1, **kwargs):
    train_data = get_random_patches(X, in_shape, ksize, n_patches_per_image, pad_h=pad_h, pad_w=pad_w)
    if prep_type is not None:
        prep = pp.choose_preprocessor_by_name(prep_type)
        prep.train(train_data)
        train_data = prep.process(train_data)
    else:
        prep = None

    C_best = None
    loss_best = None

    for i_repeat in xrange(repeat):
        print '*** repeat #%d ***' % (i_repeat + 1)
        gnp.free_reuse_cache()
        C, _, loss = clust.kmeans(train_data, K, **kwargs) 
        if loss_best is None or loss < loss_best:
            loss_best = loss
            C_best = C

    print '>>> best loss: %.2f' % loss_best
    return KMeansModel(C_best, kwargs.get('dist', 'euclidean'), in_shape.c, ksize, prep)
Beispiel #28
0
def apply_nn_train_prePCA(net, nCxt, outLayer, feat_dir, FeatList, outFeatDir, Nframes, useDropout):
    """Sends the training features for feedforward and collects the output in a matrix X for performing PCA"""

    fdir = '';
    dim = net.weights[-2].shape[1]
    X = np.zeros((Nframes,dim))

    inFeatList = open(feat_dir + FeatList).readlines()
    
    fro = 0;
    to = 0;

    for fname in inFeatList:
        if fname.rstrip()[-1] == ':':
            fdir = fname.rstrip()[:-1]+'/'
            continue
        elif fname.rstrip()[-3:]=='txt':
            utt = np.loadtxt(feat_dir + fdir + fname.rstrip())
            # if not useDropout:
            outputs = gpu.as_numpy_array(net.fprop_xf(utt, outLayer))
            # else:
            #     outputs = gpu.as_numpy_array(net.fpropDropout(utt, outLayer))
            assert(outputs.shape[1] == 40)
            fro = to
            to = fro + outputs.shape[0]
            # if X == None:
            # 	X = outputs
            # else:
            X[fro:to] = outputs
            # X = np.concatenate((X,outputs))
            # if i/1*1 == i:
            #   gpu.free_reuse_cache()
            # np.savetxt(feat_dir + outFeatDir + 'train_16k_prePCA/' + fname, gpu.as_numpy_array(outputs))
            np.save(feat_dir + outFeatDir + 'train_prePCA/' + fname[:-5], outputs)
        del outputs
        gpu.free_reuse_cache()

    #End of for
    return X
def costfunc_gpu(x, *args):
    num_input,num_hidden,num_output,inputs,lambda_val,sparsityParam,beta = args
    num_weights1 = (num_input+1)*num_hidden
    x = gpu.garray(x)
    inputs = gpu.garray(inputs)
    #weights1 = gpu.garray(reshape(x[0:num_weights1],(num_hidden,num_input+1)))
    weights1 = x[0:num_weights1].reshape((num_hidden,num_input+1))
    #weights2 = gpu.garray(reshape(x[num_weights1:shape(x)[0]], (num_output,num_hidden+1)))
    weights2 = x[num_weights1:shape(x)[0]].reshape((num_output,num_hidden+1))
    nData = shape(inputs)[1]
    data = gpu.concatenate((gpu.ones((1,nData)), inputs), axis = 0)
    hidden_sum = gpu.dot(weights1, data)
    hidden_activation = hidden_sum.logistic()
    p_avg = gpu.sum(hidden_activation,axis=1)/nData
    hidden_activation = gpu.concatenate((gpu.ones((1,nData)), hidden_activation), axis = 0)
    output = gpu.dot(weights2, hidden_activation)
    regularized_penalty1 = weights1[:,1:shape(weights1)[1]]
    regularized_penalty2 = weights2[:,1:shape(weights2)[1]]
    regularized_penalty1 = regularized_penalty1 * regularized_penalty1
    regularized_penalty2 = regularized_penalty2 * regularized_penalty2
    output_target_diff = (output - inputs)*(output - inputs)
    KL = gpu.sum(sparsityParam*gpu.log(sparsityParam/p_avg) + (1-sparsityParam)*gpu.log((1-sparsityParam)/(1-p_avg)))
    cost = gpu.sum(output_target_diff)/(2*nData) + 0.5 * lambda_val * (gpu.sum(regularized_penalty1) + gpu.sum(regularized_penalty2)) + beta*KL
    print 'Linear Decoder Cost: ', cost
    del x
    del inputs
    del data
    del hidden_sum
    del hidden_activation
    del p_avg
    del output
    del regularized_penalty1
    del regularized_penalty2
    del weights1
    del weights2
    del output_target_diff 
    gpu.free_reuse_cache()
    return cost
Beispiel #30
0
    def train(self, data, epochs, eta):
        '''
        Trains the deep net one RBM at a time

        args:
            array data:         the training data (a gnumpy.array)
            list[int] epochs:   the number of training epochs for each RBM
            float eta:          the learning rate
        '''
        layers = []
        vis = data
        for i in range(len(self.layer_sizes) - 1):
            print "Pretraining RBM %d, vis=%d, hid=%d" % (
                i + 1, self.layer_sizes[i], self.layer_sizes[i + 1])
            g_rbm = RBM(self.layer_sizes[i], self.layer_sizes[i + 1],
                        self.layer_types[i], self.layer_types[i + 1])
            g_rbm.train(vis, epochs[i], eta)
            hid = self.get_activation(g_rbm, vis)
            vis = hid
            n_rbm = Holder(g_rbm)
            layers.append(n_rbm)
            gp.free_reuse_cache()
        self.network = layers
Beispiel #31
0
    def train(self, data, epochs, eta):
        '''
        Trains the deep net one RBM at a time

        args:
            array data:         the training data (a gnumpy.array)
            list[int] epochs:   the number of training epochs for each RBM
            float eta:          the learning rate
        '''
        layers = []
        vis = data
        for i in range(len(self.layer_sizes)-1):
            self.stream.write("Pretraining RBM %d, vis=%d, hid=%d" % (i+1, self.layer_sizes[i],
                    self.layer_sizes[i+1]))
            g_rbm = RBM(self.layer_sizes[i], self.layer_sizes[i+1], 
                    self.layer_types[i], self.layer_types[i+1], stream=self.stream)
            g_rbm.train(vis, epochs[i], eta)
            hid = self.get_activation(g_rbm, vis)
            vis = hid
            n_rbm = Holder(g_rbm)
            layers.append(n_rbm)
            gp.free_reuse_cache()
        self.network = layers
    def train(self, xs, epochs, eta, early_stop = True):
        '''
        Trains the deep net one RBM at a time

        args:
            array xs:         the training xs (a gnumpy.array)
            list[int] epochs:   the number of training epochs for each RBM
            float eta:          the learning rate
        '''
        top_layers = []
        vis = xs
        for i in range(len(self.layer_sizes)-1):
            print "Pretraining RBM %d, vis=%d, hid=%d" % (i+1, self.layer_sizes[i],
                    self.layer_sizes[i+1])
            g_rbm = RBM(self.layer_sizes[i], self.layer_sizes[i+1], self.layer_types[i], self.layer_types[i+1])
            g_rbm.train(vis, epochs[i], eta[i], sample = self.sample, early_stop = early_stop)
            hid = self.get_activation(g_rbm, vis)
            vis = hid
            n_rbm = Holder(g_rbm)
            top_layers.append(n_rbm)
            gp.free_reuse_cache()
        self.network = top_layers
        return vis
def mlpSingleOutput1Layer_costfunc(x, *args):
    inputSize, l1Size, lambda_hidden, inputs, targets = args
    numCases = shape(inputs)[1]
    num_weights_L1 = l1Size * (inputSize + 1)
    inputs = gpu.garray(inputs)
    targets = gpu.garray(targets)
    theta_L1 = gpu.garray(reshape(x[0:num_weights_L1],
                                  (l1Size, inputSize + 1)))
    theta_output = gpu.garray(
        reshape(x[num_weights_L1:shape(x)[0]], (1, l1Size + 1)))
    inputs = gpu.concatenate((gpu.ones((1, numCases)), inputs), axis=0)
    hidden_sum_L1 = gpu.dot(theta_L1, inputs)
    hidden_activation_L1 = hidden_sum_L1.logistic()
    hidden_activation_L1 = gpu.concatenate((gpu.ones(
        (1, numCases)), hidden_activation_L1),
                                           axis=0)
    #hidden_activation_L1 = hidden_activation_L1 * dropout_prob
    hidden_sum_output = gpu.dot(theta_output, hidden_activation_L1)
    outputs = hidden_sum_output.logistic()
    output_target_diff = (outputs - targets)**2
    regularized_penalty_output = theta_output[:, 1:shape(theta_output)[1]]
    regularized_penalty_output = regularized_penalty_output * regularized_penalty_output
    regularized_penalty_L1 = theta_L1[:, 1:shape(theta_L1)[1]]
    regularized_penalty_L1 = regularized_penalty_L1 * regularized_penalty_L1
    cost = gpu.sum(output_target_diff) / (
        2 *
        numCases) + 0.5 * lambda_hidden * (gpu.sum(regularized_penalty_L1) +
                                           gpu.sum(regularized_penalty_output))
    print 'Multilayer Preceptron Cost:', cost
    del inputs
    del theta_L1
    del hidden_sum_L1
    del hidden_activation_L1
    del regularized_penalty_output
    del regularized_penalty_L1
    gpu.free_reuse_cache()
    return cost
Beispiel #34
0
def train_kmeans_layer(X,
                       in_shape,
                       K,
                       ksize,
                       n_patches_per_image,
                       prep_type=None,
                       pad_h=0,
                       pad_w=0,
                       repeat=1,
                       **kwargs):
    train_data = get_random_patches(X,
                                    in_shape,
                                    ksize,
                                    n_patches_per_image,
                                    pad_h=pad_h,
                                    pad_w=pad_w)
    if prep_type is not None:
        prep = pp.choose_preprocessor_by_name(prep_type)
        prep.train(train_data)
        train_data = prep.process(train_data)
    else:
        prep = None

    C_best = None
    loss_best = None

    for i_repeat in xrange(repeat):
        print '*** repeat #%d ***' % (i_repeat + 1)
        gnp.free_reuse_cache()
        C, _, loss = clust.kmeans(train_data, K, **kwargs)
        if loss_best is None or loss < loss_best:
            loss_best = loss
            C_best = C

    print '>>> best loss: %.2f' % loss_best
    return KMeansModel(C_best, kwargs.get('dist', 'euclidean'), in_shape.c,
                       ksize, prep)
def mlpSingleOutput1Layer_grad(x, *args):
    inputSize, l1Size, lambda_hidden, inputs, targets = args
    numCases = shape(inputs)[1]
    num_weights_L1 = l1Size * (inputSize + 1)
    num_weights_output = 1 * (l1Size+1)
    inputs = gpu.garray(inputs)
    targets = gpu.garray(targets)
    theta_L1 = gpu.garray(reshape(x[0:num_weights_L1], (l1Size, inputSize + 1)))
    theta_output = gpu.garray(reshape(x[num_weights_L1:shape(x)[0]], (1, l1Size+1)))
    inputs = gpu.concatenate((gpu.ones((1,numCases)), inputs), axis = 0)
    hidden_sum_L1 = gpu.dot(theta_L1, inputs)
    hidden_activation_L1 = hidden_sum_L1.logistic()
    hidden_activation_L1 = gpu.concatenate((gpu.ones((1,numCases)), hidden_activation_L1), axis = 0)
    #hidden_activation_L1 = hidden_activation_L1 * dropout_prob
    hidden_sum_output = gpu.dot(theta_output, hidden_activation_L1)
    outputs = hidden_sum_output.logistic()
    theta_L1_grad = gpu.zeros(shape(theta_L1))
    theta_output_grad = gpu.zeros(shape(theta_output))
    a = (outputs - targets) * outputs * (1-outputs)
    theta_output_grad += gpu.dot(a, gpu.garray(transpose(hidden_activation_L1.as_numpy_array())))
    b_temp = gpu.dot(gpu.garray(transpose(theta_output.as_numpy_array())),a)
    b = (b_temp*hidden_activation_L1)*(1-hidden_activation_L1)
    delta2 = gpu.dot(b, gpu.garray(transpose(inputs.as_numpy_array())))
    theta_L1_grad += delta2[1:shape(delta2)[0], :]
    theta_L1_grad = theta_L1_grad/numCases
    theta_output_grad = theta_output_grad/numCases
    theta_output_grad[:,1:shape(theta_output_grad)[1]] = theta_output_grad[:,1:shape(theta_output_grad)[1]] + theta_output[:,1:shape(theta_output)[1]] * lambda_hidden
    theta_L1_grad[:,1:shape(theta_L1_grad)[1]] = theta_L1_grad[:,1:shape(theta_L1_grad)[1]] + theta_L1[:,1:shape(theta_L1)[1]] * lambda_hidden
    theta_output_grad = reshape(theta_output_grad.as_numpy_array(), num_weights_output)
    theta_L1_grad = reshape(theta_L1_grad.as_numpy_array(), num_weights_L1)
    del inputs
    del theta_L1
    del hidden_sum_L1
    del hidden_activation_L1
    gpu.free_reuse_cache()
    return hstack((theta_L1_grad,theta_output_grad))
Beispiel #36
0
def test(opts):
    import editDistance as ed

    print "Testing model %s"%opts.inFile

    phone_map = get_phone_map_swbd()

    with open(opts.inFile,'r') as fid:
	old_opts = pickle.load(fid)
	_ = pickle.load(fid)
	_ = pickle.load(fid)
	loader = dl.DataLoader(opts.dataDir,old_opts.rawDim,old_opts.inputDim)
        if 'layers' not in dir(old_opts):
            old_opts.layers = [old_opts.layerSize]*old_opts.numLayers
	nn = nnet.NNet(old_opts.inputDim,old_opts.outputDim,old_opts.layers,train=False)
	nn.initParams()
	nn.fromFile(fid)

    totdist = numphones = 0

    fid = open('hyp.txt','w')
    for i in range(1,opts.numFiles+1):
	data_dict,alis,keys,sizes = loader.loadDataFileDict(i)
	for k in keys:
	    gp.free_reuse_cache()
	    hyp = nn.costAndGrad(data_dict[k])
	    hyp = [phone_map[h] for h in hyp]
	    ref = [phone_map[int(r)] for r in alis[k]]
	    dist,ins,dels,subs,corr = ed.edit_distance(ref,hyp)
	    print "Distance %d/%d"%(dist,len(ref))
	    fid.write(k+' '+' '.join(hyp)+'\n')
	    totdist += dist
	    numphones += len(alis[k])

    fid.close()
    print "PER : %f"%(100*totdist/float(numphones))
def multilayer_feature_learning(data, inputSize, l1Size, l2Size, sparsityParam, lambda_val, beta):
    print "Now starting feature abstraction..."
    num_input = inputSize
    num_hidden_L1 = l1Size
    num_hidden_L2 = l2Size
    num_output_L1 = inputSize
    num_output_L2 = num_hidden_L1
    sparsityParam = sparsityParam
    lambda_val = lambda_val
    beta = beta
    inputs = gpu.garray(data)
    r = gpu.sqrt(6)/gpu.sqrt(num_hidden_L1+num_input+1)
    weights1_L1 = (gpu.rand(num_hidden_L1,num_input+1))*2*r-r
    weights2_L1 = (gpu.rand(num_output_L1,num_hidden_L1+1))*2*r-r
    num_weights1_L1 = (num_input+1)*num_hidden_L1
    num_weights2_L1 = (num_hidden_L1+1)*num_output_L1
    weights1_L1 = weights1_L1.reshape(num_weights1_L1)
    weights2_L1 = weights2_L1.reshape(num_weights2_L1)
    weights_L1 = hstack((weights1_L1.as_numpy_array(),weights2_L1.as_numpy_array()))
    print "Level 1 Abstraction Starting...."
    weights_L1 = linear_decoder_run_ReLU(data, weights_L1, num_input, num_hidden_L1)
    weights1_L1 = weights_L1[0:num_weights1_L1].reshape((num_hidden_L1,num_input+1))
    weights2_L1 = weights_L1[num_weights1_L1:shape(weights_L1)[0]].reshape((num_output_L1,num_hidden_L1+1))
    scipy.io.savemat('HiggsBosonLevel1.mat', mdict={'learntFeaturesL1_1': weights1_L1, 'learntFeaturesL1_2': weights2_L1})
    L1_activation = feedforward(weights1_L1, inputs)
    del weights_L1
    del weights1_L1
    del weights2_L1
    gpu.free_reuse_cache()
    v = gpu.sqrt(6)/gpu.sqrt(num_hidden_L2+num_hidden_L1+1)
    weights1_L2 = (gpu.rand(num_hidden_L2,num_hidden_L1+1))*2*v-v
    weights2_L2 = (gpu.rand(num_output_L2,num_hidden_L2+1))*2*v-v
    num_weights1_L2 = (num_hidden_L1+1)*num_hidden_L2
    num_weights2_L2 = (num_hidden_L2+1)*num_output_L2
    weights1_L2 = weights1_L2.reshape(num_weights1_L2)
    weights2_L2 = weights2_L2.reshape(num_weights2_L2)
    weights_L2 = hstack((weights1_L2.as_numpy_array(),weights2_L2.as_numpy_array()))
    print "Level 2 Abstraction Starting...."
    weights_L2 = linear_decoder_run_ReLU(L1_activation, weights_L2, num_hidden_L1, num_hidden_L2)
    weights1_L2 = weights_L2[0:num_weights1_L2].reshape((num_hidden_L2,num_hidden_L1+1))
    weights2_L2 = weights_L2[num_weights1_L2:shape(weights_L2)[0]].reshape((num_output_L2,num_hidden_L2+1))
    scipy.io.savemat('HiggsBosonLevel2.mat', mdict={'learntFeaturesL2_1': weights1_L2,'learntFeaturesL2_2': weights2_L2})
    L2_activation = feedforward(weights1_L2, L1_activation)
    del weights_L2
    del weights1_L2
    del weights2_L2
    gpu.free_reuse_cache()
    gpu.free_reuse_cache()
    print "Abstraction completed."
    return L2_activation
def mlpSoftmax_costfunc(x, *args):
    numClasses, inputSize, l1Size, l2Size, l3Size, lambda_softmax, lambda_hidden, inputs, labels, groundTruth, dropout_probability = args
    numCases = shape(inputs)[1]
    num_weights_L1 = l1Size * (inputSize + 1)
    num_weights_L2 = l2Size * (l1Size + 1)
    num_weights_L3 = l3Size * (l2Size + 1)
    num_weights_softmax = numClasses * l3Size
    #x = gpu.garray(x)
    inputs = gpu.garray(inputs)
    theta_L1 = gpu.garray(reshape(x[0:num_weights_L1],
                                  (l1Size, inputSize + 1)))
    #theta_L1 = x[0:num_weights_L1].reshape((l1Size, inputSize + 1))
    #print numClasses, l2Size
    theta_L2 = gpu.garray(
        reshape(x[num_weights_L1:num_weights_L2 + num_weights_L1],
                (l2Size, l1Size + 1)))
    #theta_L2 = x[num_weights_L1:num_weights_L2+num_weights_L1].reshape((l2Size, l1Size + 1))
    theta_L3 = gpu.garray(
        reshape(
            x[num_weights_L2 + num_weights_L1:num_weights_L2 + num_weights_L1 +
              num_weights_L3], (l3Size, l2Size + 1)))
    theta_softmax = gpu.garray(
        reshape(
            x[num_weights_L2 + num_weights_L1 + num_weights_L3:shape(x)[0]],
            (numClasses, l3Size)))
    #theta_softmax = x[num_weights_L2+num_weights_L1:shape(x)[0]].reshape((numClasses, l2Size))
    theta_L1_grad = gpu.zeros(shape(theta_L1))
    theta_L2_grad = gpu.zeros(shape(theta_L2))
    theta_L3_grad = gpu.zeros(shape(theta_L3))
    dropout_l1 = gpu.garray(
        bernoulli.rvs(dropout_probability, size=(l1Size + 1, numCases)))
    dropout_l2 = gpu.garray(
        bernoulli.rvs(dropout_probability, size=(l2Size + 1, numCases)))
    dropout_l3 = gpu.garray(
        bernoulli.rvs(dropout_probability, size=(l3Size, numCases)))
    inputs = gpu.concatenate((gpu.ones((1, numCases)), inputs), axis=0)
    hidden_sum_L1 = gpu.dot(theta_L1, inputs)
    #hidden_activation_L1 = gpu.log(1+hidden_sum_L1.exp())
    relu_mask_hidden1 = gpu.ones(shape(hidden_sum_L1)) * (hidden_sum_L1 > 0)
    hidden_activation_L1 = hidden_sum_L1 * relu_mask_hidden1
    hidden_derivative_L1 = relu_mask_hidden1
    #hidden_activation_L1 = gpu.concatenate((gpu.ones((1,numCases)), hidden_activation_L1), axis=0)
    hidden_derivative_L1 = gpu.concatenate((gpu.ones(
        (1, numCases)), hidden_derivative_L1),
                                           axis=0)
    hidden_activation_L1 = gpu.concatenate(
        (gpu.ones((1, numCases)), hidden_activation_L1), axis=0) * dropout_l1
    hidden_sum_L2 = gpu.dot(theta_L2, hidden_activation_L1)
    #hidden_activation_L2 = gpu.log(1+hidden_sum_L2.exp())
    relu_mask_hidden2 = gpu.ones(shape(hidden_sum_L2)) * (hidden_sum_L2 > 0)
    hidden_activation_L2 = hidden_sum_L2 * relu_mask_hidden2
    hidden_derivative_L2 = relu_mask_hidden2
    #hidden_activation_L2 = gpu.concatenate((gpu.ones((1,numCases)), hidden_activation_L2), axis=0)
    hidden_derivative_L2 = gpu.concatenate((gpu.ones(
        (1, numCases)), hidden_derivative_L2),
                                           axis=0)
    hidden_activation_L2 = gpu.concatenate(
        (gpu.ones((1, numCases)), hidden_activation_L2), axis=0) * dropout_l2
    hidden_sum_L3 = gpu.dot(theta_L3, hidden_activation_L2)
    #hidden_activation_L3 = gpu.log(1+hidden_sum_L3.exp())
    relu_mask_hidden3 = gpu.ones(shape(hidden_sum_L3)) * (hidden_sum_L3 > 0)
    #hidden_activation_L3 = hidden_sum_L3*relu_mask_hidden3
    hidden_derivative_L3 = relu_mask_hidden3
    hidden_activation_L3 = hidden_sum_L3 * relu_mask_hidden3 * dropout_l3
    #hidden_activation_L3 = hidden_sum_L3.logistic() * dropout_l3
    hidden_sum_softmax = gpu.dot(theta_softmax, hidden_activation_L3)
    hidden_sum_softmax = hidden_sum_softmax - hidden_sum_softmax.max(axis=0)
    predictions = hidden_sum_softmax.exp()
    predictions = predictions / gpu.sum(predictions, axis=0)
    pred = predictions.argmax(axis=0) + 1
    accuracy = mean(pred == labels) * 100
    temp = groundTruth * gpu.log(predictions)
    temp = temp.as_numpy_array()
    temp[temp == -inf] = -200.0
    temp = nan_to_num(temp)
    regularized_penalty_L1 = theta_L1[:, 1:shape(theta_L1)[1]]
    regularized_penalty_L2 = theta_L2[:, 1:shape(theta_L2)[1]]
    regularized_penalty_L3 = theta_L3[:, 1:shape(theta_L3)[1]]
    regularized_penalty_L1 = regularized_penalty_L1 * regularized_penalty_L1
    regularized_penalty_L2 = regularized_penalty_L2 * regularized_penalty_L2
    regularized_penalty_L3 = regularized_penalty_L3 * regularized_penalty_L3
    pred_cost = -1 * sum(temp) / numCases
    l2norm_cost = 0.5 * lambda_hidden * (
        gpu.sum(regularized_penalty_L3) + gpu.sum(regularized_penalty_L2) +
        gpu.sum(regularized_penalty_L1)) + 0.5 * lambda_softmax * gpu.sum(
            theta_softmax * theta_softmax)
    #l2norm_cost = 0
    cost = pred_cost + l2norm_cost
    print 'Prediction Accuracy:                       ', accuracy, '%'
    print 'Multilayer Softmax Prediction Cost:        ', pred_cost
    print 'Multilayer Softmax L2 Normalisation Cost:  ', l2norm_cost
    print 'Multilayer Softmax Cost:                   ', cost
    print '--------------------------------------------------------------------'
    softmax_imd = groundTruth - predictions
    #theta_softmax_grad = -1*gpu.dot(softmax_imd, gpu.garray(transpose(hidden_activation_L3.as_numpy_array())))/numCases
    theta_softmax_grad = -1 * gpu.dot(
        softmax_imd,
        gpu.garray(transpose(hidden_activation_L3.as_numpy_array()))
    ) / numCases + lambda_softmax * theta_softmax
    deltaOut = -softmax_imd
    delta_L3_imd = gpu.dot(
        gpu.garray(transpose(theta_softmax.as_numpy_array())), deltaOut)
    delta_L3_imd2 = delta_L3_imd * hidden_derivative_L3
    #delta_L3_imd2 = (delta_L3_imd * hidden_activation_L3) * (1-hidden_activation_L3)
    delta_L3 = gpu.dot(
        delta_L3_imd2,
        gpu.garray(transpose(hidden_activation_L2.as_numpy_array())))
    theta_L3_grad += delta_L3
    delta_L2_imd = gpu.dot(gpu.garray(transpose(theta_L3.as_numpy_array())),
                           delta_L3_imd2)
    delta_L2_imd2 = delta_L2_imd * hidden_derivative_L2
    delta_L2_imd2 = delta_L2_imd2[1:shape(delta_L2_imd2)[0] + 1, :]
    delta_L2 = gpu.dot(
        delta_L2_imd2,
        gpu.garray(transpose(hidden_activation_L1.as_numpy_array())))
    theta_L2_grad += delta_L2
    delta_L1_imd = gpu.dot(gpu.garray(transpose(theta_L2.as_numpy_array())),
                           delta_L2_imd2)
    delta_L1_imd2 = delta_L1_imd * hidden_derivative_L1
    delta_L1_imd2 = delta_L1_imd2[1:shape(delta_L1_imd2)[0] + 1, :]
    delta_L1 = gpu.dot(delta_L1_imd2,
                       gpu.garray(transpose(inputs.as_numpy_array())))
    theta_L1_grad += delta_L1
    theta_L1_grad = theta_L1_grad / numCases
    theta_L2_grad = theta_L2_grad / numCases
    theta_L3_grad = theta_L3_grad / numCases
    theta_L1_grad[:, 1:shape(theta_L1_grad)[1]] = theta_L1_grad[:, 1:shape(
        theta_L1_grad)[1]] + theta_L1[:, 1:shape(theta_L1)[1]] * lambda_hidden
    theta_L2_grad[:, 1:shape(theta_L2_grad)[1]] = theta_L2_grad[:, 1:shape(
        theta_L2_grad)[1]] + theta_L2[:, 1:shape(theta_L2)[1]] * lambda_hidden
    theta_L3_grad[:, 1:shape(theta_L3_grad)[1]] = theta_L3_grad[:, 1:shape(
        theta_L3_grad)[1]] + theta_L3[:, 1:shape(theta_L3)[1]] * lambda_hidden
    theta_L1_grad = reshape(theta_L1_grad.as_numpy_array(), num_weights_L1)
    theta_L2_grad = reshape(theta_L2_grad.as_numpy_array(), num_weights_L2)
    theta_L3_grad = reshape(theta_L3_grad.as_numpy_array(), num_weights_L3)
    theta_softmax_grad = reshape(theta_softmax_grad.as_numpy_array(),
                                 num_weights_softmax)
    del inputs
    del theta_L1
    del theta_L2
    del theta_L3
    del theta_softmax
    del hidden_sum_L1
    del hidden_activation_L1
    del hidden_sum_L2
    del hidden_activation_L2
    del hidden_activation_L3
    del hidden_sum_L3
    del hidden_sum_softmax
    del predictions
    del temp
    del softmax_imd
    del deltaOut
    del delta_L3_imd
    del delta_L3_imd2
    del delta_L3
    del delta_L2_imd
    del delta_L2_imd2
    del delta_L2
    del delta_L1_imd
    del delta_L1_imd2
    del delta_L1
    #del regularized_penalty_L1
    #del regularized_penalty_L2
    gpu.free_reuse_cache()
    return cost, hstack(
        (theta_L1_grad, theta_L2_grad, theta_L3_grad, theta_softmax_grad))
def running(inputData):
# multilayer_feature_learning(data, inputSize, l1Size, l2Size, l3Size, sparsityParam, lambda_val, beta)
#    inputSize = shape(win_data)[0]
    inputs = inputData
    inputSize = 96 * 96
    l1Size = 10000
    l2Size = 1024
    l3Size = 196
    sparsityParam = 0.1
    lambda_val = 3e-3
    beta = 3
    multilayer_feature_learning(inputs, inputSize, l1Size, l2Size, l3Size, sparsityParam, lambda_val, beta)
    weights1 = scipy.io.loadmat('MINSTLevel1.mat')['learntFeaturesL1_1']
    weights2 = scipy.io.loadmat('MINSTLevel2.mat')['learntFeaturesL2_1']
    weights3 = scipy.io.loadmat('MINSTLevel3.mat')['learntFeaturesL3_1']
    weights4 = scipy.io.loadmat('MINSTLevel3.mat')['learntFeaturesL3_2']
    weights5 = scipy.io.loadmat('MINSTLevel2.mat')['learntFeaturesL2_2']
    weights6 = scipy.io.loadmat('MINSTLevel1.mat')['learntFeaturesL1_2']
    gpu.free_reuse_cache()
# fine tuning phase
    print "Entering Final Stage: Fine Tuning the entire network..."
    num_input = inputSize
    num_hidden1 = l1Size
    num_hidden2 = l2Size
    num_hidden3 = l3Size
    num_hidden4 = l2Size
    num_hidden5 = l1Size
    num_output = num_input
    num_weights1 = (num_input+1)*num_hidden1
    num_weights2 = (num_hidden1+1)*num_hidden2
    num_weights3 = (num_hidden2+1)*num_hidden3
    num_weights4 = (num_hidden3+1)*num_hidden4
    num_weights5 = (num_hidden4+1)*num_hidden5
    num_weights6 = (num_hidden5+1)*num_output
    weights1 = reshape(weights1, num_weights1)
    weights2 = reshape(weights2, num_weights2)
    weights3 = reshape(weights3, num_weights3)
    weights4 = reshape(weights4, num_weights4)
    weights5 = reshape(weights5, num_weights5)
    weights6 = reshape(weights6, num_weights6)
    weights = hstack((weights1,weights2,weights3,weights4,weights5,weights6))
# inputSize, l1Size, l2Size, l3Size, l4Size, l5Size, lambda_val, inputs = args
    print "Fine Tuning Starting..."
    stepSize = 14702
    for i in range(int(shape(inputs)[1]/stepSize)):
        print "Batch:", i
        data = inputs[:,i*stepSize:(i+1)*stepSize]
	args = (num_input, num_hidden1, num_hidden2, num_hidden3, num_hidden4, num_hidden5, lambda_val, data)
    	opttheta = optimize.fmin_l_bfgs_b(fine_tuning_cost_gpu, weights, fprime=fine_tuning_grad_gpu, args=args, maxiter=400)
    	weights = opttheta[0]
	del opttheta
	gpu.free_reuse_cache()
    weights1 = reshape(weights[0:num_weights1], (l1Size, inputSize + 1))
    weights2 = reshape(weights[num_weights1:num_weights1+num_weights2], (l2Size, l1Size + 1))
    weights3 = reshape(weights[num_weights1+num_weights2:num_weights1+num_weights2+num_weights3], (l3Size, l2Size + 1))
    weights4 = reshape(weights[num_weights1+num_weights2+num_weights3:num_weights1+num_weights2+num_weights3+num_weights4], (num_hidden4, num_hidden3 + 1))
    weights5 = reshape(weights[num_weights1+num_weights2+num_weights3+num_weights4:num_weights1+num_weights2+num_weights3+num_weights4+num_weights5], (num_hidden5, num_hidden4 + 1))
    weights6 = reshape(weights[num_weights1+num_weights2+num_weights3+num_weights4+num_weights5:shape(weights)[0]], (inputSize, num_hidden5+1))
    scipy.io.savemat('MINST_FineTuned_features.mat', mdict={'learntFeaturesL1': weights1,'learntFeaturesL2': weights2, 'learntFeaturesL3': weights3, 'learntFeaturesL4': weights4, 'learntFeaturesL5': weights5, 'learntFeaturesL6': weights6})
    trainData = scipy.io.loadmat('trainData.mat')['trainData']
    train_weights1 = reshape(weights1, num_weights1)
    train_weights2 = reshape(weights2, num_weights2)
    train_weights3 = reshape(weights3, num_weights3)
    train_weights4 = reshape(weights4, num_weights4)
    train_weights5 = reshape(weights5, num_weights5)
    train_weights6 = reshape(weights6, num_weights6)
    train_weights = hstack((train_weights1,train_weights2,train_weights3,train_weights4,train_weights5,train_weights6))
    args = (num_input, num_hidden1, num_hidden2, num_hidden3, num_hidden4, num_hidden5, lambda_val, trainData)
    opttheta = optimize.fmin_l_bfgs_b(fine_tuning_cost_gpu, train_weights, fprime=fine_tuning_grad_gpu, args=args, maxiter=400)
    train_weights = opttheta[0]
    del opttheta
    gpu.free_reuse_cache()
    train_weights1 = reshape(train_weights[0:num_weights1], (l1Size, inputSize + 1))
    train_weights2 = reshape(train_weights[num_weights1:num_weights1+num_weights2], (l2Size, l1Size + 1))
    train_weights3 = reshape(train_weights[num_weights1+num_weights2:num_weights1+num_weights2+num_weights3], (l3Size, l2Size + 1))
    train_weights4 = reshape(train_weights[num_weights1+num_weights2+num_weights3:num_weights1+num_weights2+num_weights3+num_weights4], (num_hidden4, num_hidden3 + 1))
    train_weights5 = reshape(train_weights[num_weights1+num_weights2+num_weights3+num_weights4:num_weights1+num_weights2+num_weights3+num_weights4+num_weights5], (num_hidden5, num_hidden4 + 1))
    train_weights6 = reshape(train_weights[num_weights1+num_weights2+num_weights3+num_weights4+num_weights5:shape(weights)[0]], (inputSize, num_hidden5+1))
    testData = scipy.io.loadmat('testData.mat')['testData']
    nData = shape(testData)[1]
    x = concatenate((ones((1,nData)), testData), axis = 0)
    hidden1_sum = dot(train_weights1, x)
    hidden1_activation = 1/(1 + exp(-hidden1_sum))
    hidden1_activation = concatenate((ones((1,nData)), hidden1_activation), axis = 0)
    hidden2_sum = dot(train_weights2, hidden1_activation)
    hidden2_activation = 1/(1 + exp(-hidden2_sum))
    hidden2_activation = concatenate((ones((1,nData)), hidden2_activation), axis = 0)
    hidden3_sum = dot(train_weights3, hidden2_activation)
    hidden3_activation = 1/(1 + exp(-hidden3_sum))
    hidden3_activation = concatenate((ones((1,nData)), hidden3_activation), axis = 0)
    hidden4_sum = dot(train_weights4, hidden3_activation)
    hidden4_activation = 1/(1 + exp(-hidden4_sum))
    hidden4_activation = concatenate((ones((1,nData)), hidden4_activation), axis = 0)
    hidden5_sum = dot(train_weights5, hidden4_activation)
    hidden5_activation = 1/(1 + exp(-hidden5_sum))
    hidden5_activation = concatenate((ones((1,nData)), hidden5_activation), axis = 0)
    output_sum = dot(train_weights6, hidden5_activation)
    outputs = 1/(1 + exp(-output_sum))
    return outputs
def multilayer_feature_learning(data, inputSize, l1Size, l2Size, l3Size, sparsityParam, lambda_val, beta):
    print "Now starting feature abstraction..."
    num_input = inputSize
    num_hidden_L1 = l1Size
    num_hidden_L2 = l2Size
    num_hidden_L3 = l3Size
    num_output_L1 = inputSize
    num_output_L2 = num_hidden_L1
    num_output_L3 = num_hidden_L2
    sparsityParam = sparsityParam
    lambda_val = lambda_val
    beta = beta
    inputs = gpu.garray(data)
    r = gpu.sqrt(6)/gpu.sqrt(num_hidden_L1+num_input+1)
    weights1_L1 = (gpu.rand(num_hidden_L1,num_input+1))*2*r-r
    weights2_L1 = (gpu.rand(num_output_L1,num_hidden_L1+1))*2*r-r
    num_weights1_L1 = (num_input+1)*num_hidden_L1
    num_weights2_L1 = (num_hidden_L1+1)*num_output_L1
    #weights1_L1 = reshape(weights1_L1, num_weights1_L1)
    weights1_L1 = weights1_L1.reshape(num_weights1_L1)
    #weights2_L1 = reshape(weights2_L1, num_weights2_L1)
    weights2_L1 = weights2_L1.reshape(num_weights2_L1)
    weights_L1 = hstack((weights1_L1.as_numpy_array(),weights2_L1.as_numpy_array()))
    print "Level 1 Abstraction Starting...."
    args = (num_input, num_hidden_L1, num_output_L1, inputs, lambda_val, sparsityParam, beta)
    opttheta_L1 = optimize.fmin_l_bfgs_b(costfunc_gpu, weights_L1, fprime=grad_costfunc_gpu, args=args, maxiter=400)
    weights_L1 = gpu.garray(opttheta_L1[0])
    #weights1_L1 = reshape(weights_L1[0:num_weights1_L1],(num_hidden_L1,num_input+1))
    weights1_L1 = weights_L1[0:num_weights1_L1].reshape((num_hidden_L1,num_input+1))
    #weights2_L1 = reshape(weights_L1[num_weights1_L1:shape(weights_L1)[0]],(num_hidden_L2,num_hidden_L1+1))
    weights2_L1 = weights_L1[num_weights1_L1:shape(weights_L1)[0]].reshape((num_output_L1,num_hidden_L1+1))
    scipy.io.savemat('MINSTLevel1.mat', mdict={'learntFeaturesL1_1': weights1_L1.as_numpy_array(), 'learntFeaturesL1_2': weights2_L1.as_numpy_array()})
    L1_activation = feedforward(weights1_L1, inputs)
    del weights_L1
    del weights1_L1
    del weights2_L1
    gpu.free_reuse_cache()
    v = gpu.sqrt(6)/gpu.sqrt(num_hidden_L2+num_hidden_L1+1)
    weights1_L2 = (gpu.rand(num_hidden_L2,num_hidden_L1+1))*2*v-v
    weights2_L2 = (gpu.rand(num_output_L2,num_hidden_L2+1))*2*v-v
    num_weights1_L2 = (num_hidden_L1+1)*num_hidden_L2
    num_weights2_L2 = (num_hidden_L2+1)*num_output_L2
    #weights1_L2 = reshape(weights1_L2, num_weights1_L2)
    weights1_L2 = weights1_L2.reshape(num_weights1_L2)
    #weights2_L2 = reshape(weights2_L2, num_weights2_L2)
    weights2_L2 = weights2_L2.reshape(num_weights2_L2)
    weights_L2 = hstack((weights1_L2.as_numpy_array(),weights2_L2.as_numpy_array()))
    args = (num_hidden_L1, num_hidden_L2, num_output_L2, L1_activation, lambda_val, sparsityParam, beta)
    print "Level 2 Abstraction Starting...."
    opttheta_L2 = optimize.fmin_l_bfgs_b(costfunc_gpu, weights_L2, fprime=grad_costfunc_gpu, args=args, maxiter=400)
    weights_L2 = gpu.garray(opttheta_L2[0])
    #weights1_L2 = reshape(weights_L2[0:num_weights1_L2],(num_hidden_L2,num_hidden_L1+1))
    weights1_L2 = weights_L2[0:num_weights1_L2].reshape((num_hidden_L2,num_hidden_L1+1))
    weights2_L2 = weights_L2[num_weights1_L2:shape(weights_L2)[0]].reshape((num_output_L2,num_hidden_L2+1))
    scipy.io.savemat('MINSTLevel2.mat', mdict={'learntFeaturesL2_1': weights1_L2.as_numpy_array(),'learntFeaturesL2_2': weights2_L2.as_numpy_array()})
    L2_activation = feedforward(weights1_L2, L1_activation)
    del weights_L2
    del weights1_L2
    del weights2_L2
    gpu.free_reuse_cache()
    u = gpu.sqrt(6)/gpu.sqrt(num_hidden_L3+num_hidden_L2+1)
    weights1_L3 = (gpu.rand(num_hidden_L3,num_hidden_L2+1))*2*u-u
    weights2_L3 = (gpu.rand(num_output_L3,num_hidden_L3+1))*2*u-u
    num_weights1_L3 = (num_hidden_L2+1)*num_hidden_L3
    num_weights2_L3 = (num_hidden_L3+1)*num_output_L3
    #weights1_L3 = reshape(weights1_L3, num_weights1_L3)
    weights1_L3 = weights1_L3.reshape(num_weights1_L3)
    #weights2_L3 = reshape(weights2_L3, num_weights2_L3)
    weights2_L3 = weights2_L3.reshape(num_weights2_L3)
    weights_L3 = hstack((weights1_L3.as_numpy_array(),weights2_L3.as_numpy_array()))
    args = (num_hidden_L2, num_hidden_L3, num_output_L3, L2_activation, lambda_val, sparsityParam, beta)
    print "Level 3 Abstraction Starting...."
    opttheta_L3 = optimize.fmin_l_bfgs_b(costfunc_gpu, weights_L3, fprime=grad_costfunc_gpu, args=args, maxiter=400)
    weights_L3 = gpu.garray(opttheta_L3[0])
    #weights1_L3 = reshape(weights_L3[0:num_weights1_L3],(num_hidden_L3,num_hidden_L2+1))
    weights1_L3 = weights_L3[0:num_weights1_L3].reshape((num_hidden_L3,num_hidden_L2+1))
    weights2_L3 = weights_L3[num_weights1_L3:shape(weights_L3)[0]].reshape((num_output_L3,num_hidden_L3+1))
    scipy.io.savemat('MINSTLevel3.mat', mdict={'learntFeaturesL3_1': weights1_L3.as_numpy_array(),'learntFeaturesL3_2': weights2_L3.as_numpy_array()})
    L3_activation = feedforward(weights1_L3, L2_activation)
    del weights_L3
    del weights1_L3
    del weights2_L3
    gpu.free_reuse_cache()
    print "Abstraction completed."
    return L3_activation
Beispiel #41
0
def _classify(path, name, frames, channels, target, choices, CellObject):
    gnp.free_reuse_cache()
    #GPU TO USE, WE HAVE 2, I PREFER IF YOU'RE USING GPU 0
    #whole images take up a lot of memory so we need to coordinate this. 
    # if you're not using the notebook or a script make sure to shutdown or restart the notebook
    # you can use nvidia-smi in terminal to see what process are running on the GPU
    gnp._useGPUid = 0
    #protein localization categories
    localizationTerms=['ACTIN', 'BUDNECK', 'BUDTIP', 'CELLPERIPHERY', 'CYTOPLASM',
       'ENDOSOME', 'ER', 'GOLGI', 'MITOCHONDRIA', 'NUCLEARPERIPHERY',
       'NUCLEI', 'NUCLEOLUS', 'PEROXISOME', 'SPINDLE', 'SPINDLEPOLE',
       'VACUOLARMEMBRANE', 'VACUOLE']
    
    #normalization values (don't need to change)
    norm_vals = np.load('/home/morphology/mpg4/OrenKraus/Data_Sets/Yeast_Protein_Localization/Yolanda_Chong/overal_mean_std_for_single_cell_crops_based_on_Huh.npz')

    #may change to better model (constatly training bgnumpy.track_memory_usage=Trueetter networks)
    model_path = '/home/okraus/mil_models_backup/mil_models/Yeast_Protein_Localization/Yeast_NAND_a_10_scratch_Dropout_v5_MAP_early_stopping_best_model.npz'

    #load model and set evaluation type (MIL convolves across whole images)
    #change size
   

    curImages, sizes = getImageData(path, frames, channels)
    curImages = normalize_by_constant_values(curImages,norm_vals['means'],norm_vals['stdevs'])
    
    sizeX=sizes[1]
    sizeY=sizes[0]

    nn = modelEvalFunctions.loadResizedModel(model_path,sizeY,sizeX)
    model = modelEvalFunctions.evaluateModel_MIL(nn,localizationTerms,outputLayer='loc')

    
    nn.ForwardProp({'X0':gnp.garray(curImages)})

    # GET RATIOS OF CLASSES
    #values of prediction maps above
    pred_maps = nn._layers['MIL_pool'].Z[target-1].as_numpy_array()
    #calculate relative activation of each map
    area = pred_maps.sum(1).sum(1) / pred_maps.sum()
    #calculate absolute area of each map (optional)
    area2 = pred_maps.sum(1).sum(1) / (pred_maps.shape[1]*pred_maps.shape[2])
    #plot relative activations per class, use area or area2
    area_lib = {}

    jacobian = getJacobian(nn,frames)
    plt.imshow(jacobian[target-1,0])
    loc = str(settings.MEDIA_ROOT + '/classes/' + name.split('.')[0]+"_FULL0")
    save(loc)
    
    mahotas_segmentation = mahotas_clean_up_seg(jacobian,target-1)
    plt.imshow(mahotas_segmentation)
    loc = str(settings.MEDIA_ROOT + '/classes/' + name.split('.')[0]+"_FULL1")
    save(loc)

    show_segmentation_boundaries(curImages,mahotas_segmentation,target-1,sizeX, sizeY)
    loc = str(settings.MEDIA_ROOT + '/classes/' + name.split('.')[0]+"_FULL2")
    save(loc)

    top5indices = np.argsort(area)[::-1][:5]
    del jacobian
    del mahotas_segmentation

    for i in range(len(localizationTerms)):
        if i in top5indices:
            area_lib[localizationTerms[i]] = area[i]
            jacobian_per_class = getJacobian_per_class(nn,i,frames)
            im2show = mahotas_clean_up_seg(jacobian_per_class, target-1)
            overlay(curImages,im2show,target-1,sizeX, sizeY)
            loc = str(settings.MEDIA_ROOT + '/classes/' + name.split('.')[0]+"_"+localizationTerms[i])
            save(loc)
            np.save(loc, im2show)
            continue
        if localizationTerms[i] not in choices:
            continue
        area_lib[localizationTerms[i]] = area[i]
        jacobian_per_class = getJacobian_per_class(nn,i,frames)[target-1]
        im2show = np.int8(np.log(1+jacobian_per_class[0])>0.1+np.int8(np.log(1+jacobian_per_class[1])>1))>0
        im2show = mh.dilate(mh.dilate(mh.dilate(mh.erode(mh.erode(mh.erode(im2show>0))))))
        overlay(curImages,im2show,target-1,sizeX, sizeY)
        loc = str(settings.MEDIA_ROOT + '/classes/' + name.split('.')[0]+"_"+localizationTerms[i])
        save(loc)
        np.save(loc, im2show)
    del nn
    del model
    gnp.free_reuse_cache()
    f = [['Class', 'Area']]
    for key in area_lib:
        f.append([str(key), area_lib[key]])
    CellObject.activations = f
    CellObject.save()
    from openpyxl import Workbook
    wb = Workbook()
    ws = wb.active
    for arr in f:
        ws.append(arr)
    wb.save(settings.MEDIA_ROOT + '/classes/' + name.split('.')[0] + '.xlsx')
    if CellObject.email != '':
        send_mail('Deep Cell Vision', 'Your image has been classified. Go to http://deepcellvision.com/results/' +CellObject.name + ' to see your results' , '*****@*****.**',
    [CellObject.email], fail_silently=False)
    return
#filename = './data/sixteenth_note_bass_drum_patterns.mid'
filename = './data/blast_beat.mid'
start_beat = 1
offset = 8*16
T = max(model.Tv,model.Th)
seed_data = data_helper.get_seed_pattern(filename,model,offset)


sequence = model.generate(seed_data,num_steps,K,start_beat,noisy).reshape((-1,model.Nv)).T/model.vis_scale
##sequence = drum_matrix
beats = midi_tools.label_drum_matrix(sequence.shape[1],period=16,offset=0)
#
#quarters = plot_vstack_beat(sequence,beats)
sequence = sequence.as_numpy_array()

pl.clf()
pl.imshow(sequence,origin='lower')
pl.show()

ref_midi_file = filename
output_dir = './output/'
tatums_per_beat = 4
output_filename = 'generated_%u.midi' % np.random.randint(100000)
midioutput = midi_tools.drum_matrix_to_midi(sequence,tatums_per_beat,
        output_dir+output_filename,ref_midi_file)
print output_filename

gp.free_reuse_cache()


Beispiel #43
0
    def backprop_gradient(self, v, network, X, targets, weights):
        '''
        Calculates the value of the cost function and the gradient for CG 
        optimization.

        args:
            array v:            the 1d vector of weights
            list[obj] network:  the network
            array X:            training data
            array targets:      the training targets
            array weights:      the backprop weights
        returns:
            array cost:         the value of the cost function
            array grad:         the value of the gradient

        This function is called by scipy's minimize function during optimization
        '''
        if len(v.shape) == 1:
            v = v.reshape((v.shape[0],1))
        # initialize variables
        n = X.shape[0]
        numHiddenLayers = len(network)

        # put the v weights back into the network
        ind =0 
        for i in range(numHiddenLayers):
            h,w = network[i].W.shape
            network[i].W = gp.garray((v[ind:(ind+h*w)]).reshape((h,w)))
            ind += h*w
            b = network[i].hbias.shape[0]
            network[i].hbias = gp.garray(v[ind:(ind+b)]).reshape((b,1))
            ind += b

        # Run data through the network, keeping activations of each layer
        acts = [X] # a list of numpy arrays
        hid = X
        for layer in network:
            vis = gp.garray(hid)
            hid = self.get_activation(layer, vis) 
            acts.append(hid)
            gp.free_reuse_cache()

        # store the gradients
        dW = []
        db = []

        # Compute the value of the cost function
        if self.targetCost == 'crossEntropy':
            # see www.stanford.edu/group/pdplab/pdphandbook/handbookch6.html
            cost = (-1.0/n) * np.sum(np.sum(targets * np.log(acts[-1]) + \
                    (1.0 - targets) * np.log(1.0 - acts[-1]), axis=1) * weights.T)
            Ix = (acts[-1] - targets) / n
        else: #self.targetCost == 'linSquaredErr':
            cost = 0.5 * np.sum(np.sum(np.square(acts[-1] - targets), axis=1) * \
                    weights.T)
            Ix = (acts[-1] - targets)
        Ix *= np.tile(weights, (1, Ix.shape[1])).reshape((Ix.shape[0],Ix.shape[1]))
        Ix = gp.garray(Ix)

        # Compute the gradients
        for i in range(numHiddenLayers-1,-1,-1):
            # augment activations with ones
            acts[i] = gp.garray(acts[i])
            acts[i] = gp.concatenate((acts[i], gp.ones((n,1))), axis=1)

            # compute delta in next layer
            delta = gp.dot(acts[i].T, Ix)

            # split delta into weights and bias parts
            dW.append(delta[:-1,:].T)
            db.append(delta[-1,:].T)

            # backpropagate the error
            if i > 0:
                if network[i-1].hidtype == 'sigmoid':
                    Ix = gp.dot(Ix,gp.concatenate((network[i].W,network[i].hbias),
                        axis=1)) * acts[i] * (1.0 - acts[i])
                elif network[i-1].hidtype == 'gaussian':
                    Ix = gp.dot(Ix,gp.concatenate((network[i].W,network[i].hbias),
                        axis=1))
                Ix = Ix[:,:-1]
            gp.free_reuse_cache()
        dW.reverse()
        db.reverse()

        # Convert gradient information
        grad = np.zeros_like(v)
        ind = 0
        for i in range(numHiddenLayers):
            grad[ind:(ind+dW[i].size)] = \
                 (dW[i].reshape((dW[i].shape[0]*dW[i].shape[1],1))).as_numpy_array()
            ind += dW[i].size
            grad[ind:(ind+db[i].size),0] = db[i].as_numpy_array()
            ind += db[i].size
        grad = grad.reshape((grad.shape[0],))
        return cost, grad  
Beispiel #44
0
    def train(self):
        config = self.config

        # convert t into a matrix in 1-of-K representation if it is a vector
        t = self.train_data.T
        T_matrix = self.output.act_type.label_vec_to_mat(t, self.train_data.K)

        layer_config = LayerConfig()
        layer_config.learn_rate = config.learn_rate
        layer_config.momentum = config.init_momentum
        layer_config.weight_decay = config.weight_decay

        nnstore = NNStore()
        nnstore.init_from_net(self)

        best_net = NNStore()
        best_net.init_from_net(self)

        train_acc, val_acc, test_acc = self.display_training_info(
                -1, 
                self._compute_loss(
                    self.train_data.X, T_matrix, config.minibatch_size),
                0)
        acc_rec = np.zeros((config.num_epochs / config.epoch_to_display + 1, 4))
        acc_rec[0, 0] = 0
        acc_rec[0, 1] = train_acc
        if config.is_val:
            acc_rec[0, 2] = val_acc
        if config.is_test:
            acc_rec[0, 3] = test_acc

        t_start = time.time()

        best_acc = val_acc
        if self.config.is_test:
            best_test_acc = test_acc
        best_epoch = -1

        for epoch in range(0, config.num_epochs):
            gnp.free_reuse_cache()

            # decrease learning rate over time
            layer_config.learn_rate = config.learn_rate / \
                    (epoch / config.lr_drop_rate + 1)

            # TODO [dirty] special for Lnsvm
            if isinstance(self.output.act_type, act.LnsvmVariantOutput):
                #self.output.act_type.n = 3.0 - (3.0 - 0.5) / 50 * epoch
                self.output.act_type.n = 0.5
                if self.output.act_type.n < 0.5:
                    self.output.act_type.n = 0.5 

                if (epoch + 1) % config.epoch_to_display == 0:
                    print 'n %.4f' % self.output.act_type.n,
            
            if epoch >= config.switch_epoch:
                layer_config.momentum = config.final_momentum

            # shuffle the dataset 
            idx = np.random.permutation(self.num_total_cases)
            #idx = np.arange(self.num_total_cases)
            train_X = self.train_data.X[idx]
            train_T = T_matrix[idx]

            if config.input_noise > 0:
                train_X = train_X * (gnp.rand(train_X.shape) > config.input_noise)
                # train_X = train_X + gnp.randn(train_X.shape) * config.input_noise

            loss = 0

            for batch in range(0, self.num_minibatches):
                i_start = batch * config.minibatch_size
                if not batch == self.num_minibatches - 1:
                    i_end = i_start + config.minibatch_size
                else:
                    i_end = self.num_total_cases

                X = train_X[i_start:i_end]
                T = train_T[i_start:i_end]

                # forward pass
                self._forward(X)

                # compute loss
                loss += self.output.loss(T)

                if self.output.Y.isnan().any():
                    import ipdb
                    ipdb.set_trace()
                    print 'batch #%d <-- nan' % batch

                # backprop
                dLdXabove = self.output.backprop(layer_config)
                for i in range(self.num_layers-1, -1, -1):
                    dLdXabove = self.layer[i].backprop(dLdXabove, layer_config)

            # statistics
            avg_loss = 1.0 * loss / self.num_total_cases

            if (epoch + 1) % config.epoch_to_display == 0:
                train_acc, val_acc, test_acc = self.display_training_info(
                        epoch, avg_loss, time.time() - t_start)

                if val_acc == None:
                    val_acc = train_acc

                if (config.show_task_loss and val_acc < best_acc) or \
                        (not config.show_task_loss and val_acc > best_acc):
                    best_acc = val_acc
                    best_net.update_from_net(self)
                    if config.is_test:
                        best_test_acc = test_acc
                    best_epoch = epoch
                t_start = time.time()
                acc_rec[(epoch + 1) / config.epoch_to_display, 0] = epoch + 1
                acc_rec[(epoch + 1) / config.epoch_to_display, 1] = train_acc
                if config.is_val:
                    acc_rec[(epoch + 1) / config.epoch_to_display, 2] = val_acc
                if config.is_test:
                    acc_rec[(epoch + 1) / config.epoch_to_display, 3] = test_acc

            if (epoch + 1) % config.epoch_to_save == 0:
                nnstore.update_from_net(self)
                nnstore.write(config.output_dir + '/m' + str(epoch + 1) + '.pdata')


        print '----------------------------------------------------------------'

        if config.show_task_loss:
            s = 'loss'
        else:
            s = 'acc'
        
        if config.is_val:
            print 'Best val_%s %.4f' % (s, best_acc),
        else:
            print 'Best train_%s %.4f' % (s, best_acc),

        if config.is_test:
            print '--> test_%s %.4f' % (s, best_test_acc),
        print 'at epoch %d' % (best_epoch + 1)

        if config.is_output:
            f = open('%s/acc_rec.pdata' % config.output_dir, 'w')
            pickle.dump(acc_rec, f, -1)
            f.close()

            self.write_config('%s/cfg.txt' % config.output_dir)

            # save the best net
            fname = config.output_dir + '/best_net.pdata'
            print 'Saving the best model to ' + fname
            best_net.write(fname)

        if config.is_test:
            return (best_acc, best_test_acc)
        else:
            return (best_acc)
def mlpSoftmax_grad(x, *args):
    numClasses, inputSize, l1Size, l2Size, lambda_softmax,lambda_hidden, inputs, labels, groundTruth = args
    numCases = shape(inputs)[1]
    num_weights_L1 = l1Size * (inputSize + 1)
    num_weights_L2 = l2Size * (l1Size + 1)
    num_weights_softmax = numClasses * l2Size
    #x = gpu.garray(x)
    inputs = gpu.garray(inputs)
    theta_L1 = gpu.garray(reshape(x[0:num_weights_L1], (l1Size, inputSize + 1)))
    theta_L2 = gpu.garray(reshape(x[num_weights_L1:num_weights_L2+num_weights_L1], (l2Size, l1Size + 1)))
    theta_softmax = gpu.garray(reshape(x[num_weights_L2+num_weights_L1:shape(x)[0]], (numClasses, l2Size)))
    theta_L1_grad = gpu.zeros(shape(theta_L1))
    theta_L2_grad = gpu.zeros(shape(theta_L2))
    inputs = gpu.concatenate((gpu.ones((1,numCases)), inputs), axis = 0)
    hidden_sum_L1 = gpu.dot(theta_L1, inputs)
    hidden_activation_L1 = hidden_sum_L1.logistic()
    hidden_activation_L1 = gpu.concatenate((gpu.ones((1,numCases)), hidden_activation_L1), axis=0)
    hidden_sum_L2 = gpu.dot(theta_L2, hidden_activation_L1)
    hidden_activation_L2 = hidden_sum_L2.logistic()
    hidden_sum_softmax_imd = gpu.dot(theta_softmax, hidden_activation_L2)
    hidden_softmax_activation = hidden_sum_softmax_imd.logistic()
    hidden_sum_softmax = hidden_sum_softmax_imd - hidden_sum_softmax_imd.max(axis = 0)
    predictions = hidden_sum_softmax.exp()
    predictions = predictions / gpu.sum(predictions,axis = 0)
    softmax_imd = groundTruth - predictions
    theta_softmax_grad = -1*gpu.dot(softmax_imd, gpu.garray(transpose(hidden_activation_L2.as_numpy_array())))/numCases + lambda_softmax * theta_softmax
    deltaOut = -softmax_imd
    delta_L2_imd = gpu.dot(gpu.garray(transpose(theta_softmax.as_numpy_array())), deltaOut)
    #delta_L2_imd2 = multiply(multiply(delta_L2_imd, hidden_activation_L2), (1-hidden_activation_L2))
    delta_L2_imd2 = (delta_L2_imd*hidden_activation_L2)*(1-hidden_activation_L2)
    delta_L2 = gpu.dot(delta_L2_imd2, gpu.garray(transpose(hidden_activation_L1.as_numpy_array())))
    theta_L2_grad += delta_L2
    delta_L1_imd = gpu.dot(gpu.garray(transpose(theta_L2.as_numpy_array())), delta_L2_imd2)
    #delta_L1_imd2 = multiply(multiply(delta_L1_imd, hidden_activation_L1), (1-hidden_activation_L1))
    delta_L1_imd2 = (delta_L1_imd*hidden_activation_L1)*(1-hidden_activation_L1)
    delta_L1 = gpu.dot(delta_L1_imd2, gpu.garray(transpose(inputs.as_numpy_array())))
    theta_L1_grad += delta_L1[1:shape(theta_L1)[0]+1,:]
    theta_L1_grad = theta_L1_grad/numCases
    theta_L2_grad = theta_L2_grad/numCases
    theta_L1_grad[:, 1:shape(theta_L1_grad)[1]] = theta_L1_grad[:, 1:shape(theta_L1_grad)[1]] + theta_L1[:, 1: shape(theta_L1)[1]] * lambda_hidden
    theta_L2_grad[:, 1:shape(theta_L2_grad)[1]] = theta_L2_grad[:, 1:shape(theta_L2_grad)[1]] + theta_L2[:, 1: shape(theta_L2)[1]] * lambda_hidden
    theta_L1_grad = reshape(theta_L1_grad.as_numpy_array(), num_weights_L1)
    theta_L2_grad = reshape(theta_L2_grad.as_numpy_array(), num_weights_L2)
    theta_softmax_grad = reshape(theta_softmax_grad.as_numpy_array(), num_weights_softmax)
    del inputs
    del theta_L1
    del theta_L2
    del theta_softmax
    del hidden_sum_L1
    del hidden_activation_L1
    del hidden_sum_L2
    del hidden_activation_L2
    del hidden_sum_softmax
    del predictions
    del softmax_imd
    del deltaOut
    del delta_L2_imd
    del delta_L2_imd2
    del delta_L2
    del delta_L1_imd
    del delta_L1_imd2
    del delta_L1
    gpu.free_reuse_cache()
    return hstack((theta_L1_grad,theta_L2_grad, theta_softmax_grad))
Beispiel #46
0
def kmeans(X, K, init='plus', dist='euclidean', empty_action='singleton', max_iters=100, verbose=True):
    """
    X: NxD dataset, each row is one data point.
    init: method to choose initial cluster centers.  Available options: {
        'plus': k-means++, 
        'sample': randomly sample K data points,
        'random': generate K points uniformly at random from X's range }
    dist: distance metric to be used. Available options: {
        'euclidean': Euclidean distance. }
    empty_action: action to take when one cluster lost all its members.  
        Available options: {
        'singleton': create a new cluster to replace it using a point furthest 
            to the current center.
        'error': raise an exception. }
    max_iters: maximum number of iterations to run.
    verbose: if False, nothing will be printed during training.

    Return:
        C: KxD matrix, cluster centers, each row is one center
        idx: N-d vector, cluster assignments for each data point.
        loss: sum of distances for the dataset under the given distance metric.
    """
    t_start = time.time()
    gnp.free_reuse_cache()
    gnp.max_memory_usage = 3.8 * 1000 * 1000 * 1000

    def f_print(s, newline=True):
        if verbose:
            if newline:
                print s
            else:
                print s,
        else:
            pass

    f_print('Initializing k-means...', newline=False)
    X = gnp.as_garray(X)
    X_cpu = X.asarray().astype(np.float64)
    
    if isinstance(init, str):
        f_init = choose_initializer(init)
        C = f_init(X, K, dist=dist)
    elif isinstance(init, gnp.garray) or isinstance(init, np.ndarray):
        C = gnp.as_garray(init)
        print '[Warning] Init centers provided, K and init not used.'
        K = C.shape[0]

    f_dist = choose_distance_metric(dist)

    loss = 0
    idx = None
    prev_idx = None

    full_idx = np.arange(X.shape[0])
    f_print('done [%.2fs]' % (time.time() - t_start))

    t_start = time.time()
    i_iter = 0
    while i_iter <= max_iters:
        gnp.free_reuse_cache()
        f_print('iter %d,' % i_iter, newline=False)
        
        # use GPU to compute distance because it is fast,
        # bug go back to CPU to avoid low precision problem
        D = f_dist(X, C).asarray().astype(np.float64)
        idx = D.argmin(axis=1)
        loss = D[full_idx, idx].sum()

        if prev_idx is not None and (idx == prev_idx).all():
            print '** k-means converged **'
            break
        else:
            prev_idx = idx

        # update cluster center
        do_restart = False
        for k in xrange(K):
            k_idx = full_idx[idx == k]
            if k_idx.size == 0:
                if empty_action == 'singleton':
                    # update C
                    C[k] = X[f_dist(X, C[k:k+1]).ravel().argmax()]
                    do_restart = True
                elif empty_action == 'error':
                    raise Exception('Empty cluster encountered in k-means!')
                else:
                    raise Exception('Action not specified for empty cluster.')
            else:
                C[k] = X_cpu[k_idx].mean(axis=0)

        f_print('loss=%.2f, [%.2fs]' % (loss, time.time() - t_start))

        if do_restart:
            print '[Warning] restarting because empty clusters encountered.'
            i_iter = 0

        t_start = time.time()

        i_iter += 1

    return C, idx, loss
Beispiel #47
0
def garbage_collect():
    global _gnumpy_loaded
    if _gnumpy_loaded:
        gp.free_reuse_cache(True)
    gc.collect()