def apply_nn_test(P, net, nCxt, outLayer, feat_dir, FeatList, outFeatDir, useDropout): "Sends the test features for feedforward, and applies the PCA calculated from training files" fdir = ''; inFeatList = open(feat_dir + FeatList).readlines() for fname in inFeatList: if fname == '\n': continue elif fname.rstrip()[-1] == ':': fdir = fname.rstrip()[:-1]+'/' print fdir continue elif fname.rstrip()[-3:]=='txt': utt = np.loadtxt(feat_dir + fdir + fname[:-1]) # if not useDropout: outputs = gpu.as_numpy_array(net.fprop_xf(utt, outLayer)) # else: # outputs = gpu.as_numpy_array(net.fpropDropout(utt, outLayer)) assert(outputs.shape[1] == 40) outputs = np.dot(outputs, P) # if i/1*1 == i: # gpu.free_reuse_cache() outfile=htkmfc.HTKFeat_write(feat_dir + outFeatDir + 'test_feat/' + fdir[-9:] + fname[:-5], outputs.shape[1], htkmfc.USER) outfile.writeall(outputs) del outfile del outputs gpu.free_reuse_cache()
def gradcheck(epsilon=1e-4): import dataLoader as dl import random loader = dl.DataLoader('/scail/group/deeplearning/speech/awni/kaldi-stanford/kaldi-trunk/egs/timit/s5/exp/nn_train/',41*23,41*23) nn = NNet(41*23,41*23,[1024]) nn.initParams() data_dict,alis,keys,sizes = loader.loadDataFileDict(1) k = random.sample(keys,1)[0] data = gp.garray(data_dict[k]) labels = np.array(alis[k],dtype=np.int32) cost,grad,_ = nn.costAndGrad(data,labels) print data.shape print labels.shape while True: m,n = nn.stack[1][0].shape msample,nsample = random.randint(0,m-1),random.randint(0,n-1) nn.stack[1][0][msample,nsample] += epsilon cost2,grad,_ = nn.costAndGrad(data,labels) nn.stack[1][0][msample,nsample] -= epsilon finite_diff = (cost2 - cost) / epsilon print "Analytic %.6f -- Finite %.6f"%(grad[1][0][msample,nsample],finite_diff) # Clear gp mem gp.free_reuse_cache()
def linear_decoder_run(data, numInput, numHidden): print "Starting Feature Abstraction..." gpu.free_reuse_cache() num_input = numInput num_hidden = numHidden num_output = numInput lambda_val = 3e-4 sparsityParam = 0.035 beta = 5 inputs = data r = sqrt(6) / sqrt(num_hidden + num_input + 1) weights1 = (random.rand(num_hidden, num_input + 1)) * 2 * r - r weights2 = (random.rand(num_output, num_hidden + 1)) * 2 * r - r num_weights1 = (num_input + 1) * num_hidden num_weights2 = (num_hidden + 1) * num_output weights1 = reshape(weights1, num_weights1) weights2 = reshape(weights2, num_weights2) weights = hstack((weights1, weights2)) args = (num_input, num_hidden, num_output, inputs, lambda_val, sparsityParam, beta) opttheta = optimize.fmin_l_bfgs_b(costfunc, weights, fprime=grad_costfunc, args=args, maxiter=500) weights = opttheta[0] weights1 = reshape(weights[0:num_weights1], (num_hidden, num_input + 1)) weights2 = reshape(weights[num_weights1:shape(weights)[0]], (num_output, num_hidden + 1)) scipy.io.savemat('learntFeatures.mat', mdict={'learntFeatures': weights1}) return weights1
def mlpSingleOutput1Layer_costfunc(x, *args): inputSize, l1Size, lambda_hidden, inputs, targets = args numCases = shape(inputs)[1] num_weights_L1 = l1Size * (inputSize + 1) inputs = gpu.garray(inputs) targets = gpu.garray(targets) theta_L1 = gpu.garray(reshape(x[0:num_weights_L1], (l1Size, inputSize + 1))) theta_output = gpu.garray(reshape(x[num_weights_L1:shape(x)[0]], (1, l1Size+1))) inputs = gpu.concatenate((gpu.ones((1,numCases)), inputs), axis = 0) hidden_sum_L1 = gpu.dot(theta_L1, inputs) hidden_activation_L1 = hidden_sum_L1.logistic() hidden_activation_L1 = gpu.concatenate((gpu.ones((1,numCases)), hidden_activation_L1), axis = 0) #hidden_activation_L1 = hidden_activation_L1 * dropout_prob hidden_sum_output = gpu.dot(theta_output, hidden_activation_L1) outputs = hidden_sum_output.logistic() output_target_diff = (outputs - targets)**2 regularized_penalty_output = theta_output[:,1:shape(theta_output)[1]] regularized_penalty_output = regularized_penalty_output * regularized_penalty_output regularized_penalty_L1 = theta_L1[:,1:shape(theta_L1)[1]] regularized_penalty_L1 = regularized_penalty_L1 * regularized_penalty_L1 cost = gpu.sum(output_target_diff)/(2*numCases) + 0.5 * lambda_hidden*(gpu.sum(regularized_penalty_L1)+gpu.sum(regularized_penalty_output)) print 'Multilayer Preceptron Cost:', cost del inputs del theta_L1 del hidden_sum_L1 del hidden_activation_L1 del regularized_penalty_output del regularized_penalty_L1 gpu.free_reuse_cache() return cost
def costAndGradSFO(self,stack,datums): """ Wrapper function used for SFO optimizer. """ N = len(datums) cost = 0. grad = [[gp.zeros(w.shape),gp.zeros(b.shape)] for w,b in self.stack] # Push stack to device self.stack = [[gp.garray(w),gp.garray(b)] for w,b in stack] for datum in datums: data = gp.garray(self.data_dict[datum]) labels = np.array(self.alis[datum], dtype=np.int32) costSingle,gradSingle,skip = self.costAndGrad(data,labels) if skip: print "LOGGING SKIP" #TODO what to do here? N -= 1 continue grad = [[gs[0]+g[0],gs[1]+g[1]] for gs,g in zip(gradSingle,grad)] cost += costSingle # Have to force GC the gpu... gnumpy lameness gp.free_reuse_cache() # Pull gradient from device grad = [[((1./N)*gw).as_numpy_array(), ((1./N)*gb).as_numpy_array()] for gw,gb in grad] cost *= 1./N return cost,grad
def print_total_garray_size(): gp.free_reuse_cache() tot = 0 for obj in gc.get_objects(): if isinstance(obj, gp.garray): tot += obj.size print "Total GPU memory used by garrays: %.1f MB" % (tot / 1e6) print "Total GPU memory use reported by gnumpy: %.1f MB" % (gp.memory_in_use() / 1e6)
def grad_costfunc_gpu_ReLU(x, *args): num_input, num_hidden, num_output, inputs, lambda_val, sparsityParam, beta = args num_weights1 = (num_input + 1) * num_hidden num_weights2 = (num_hidden + 1) * num_output x = gpu.garray(x) inputs = gpu.garray(inputs) weights1 = x[0:num_weights1].reshape((num_hidden, num_input + 1)) weights2 = x[num_weights1:shape(x)[0]].reshape( (num_output, num_hidden + 1)) nData = shape(inputs)[1] data = gpu.concatenate((gpu.ones((1, nData)), inputs), axis=0) hidden_sum = gpu.dot(weights1, data) #hidden_activation = gpu.log(1+hidden_sum.exp()) relu_mask_hidden1 = gpu.ones(shape(hidden_sum)) * (hidden_sum > 0) hidden_activation = hidden_sum * relu_mask_hidden1 #hidden_derivative = hidden_sum.logistic() hidden_derivative = relu_mask_hidden1 hidden_activation = gpu.concatenate((gpu.ones( (1, nData)), hidden_activation), axis=0) hidden_derivative = gpu.concatenate((gpu.ones( (1, nData)), hidden_derivative), axis=0) outputs = gpu.dot(weights2, hidden_activation) weights1_grad = gpu.zeros(shape(weights1)) weights2_grad = gpu.zeros(shape(weights2)) p = outputs - inputs weights2_grad += gpu.dot( p, gpu.garray(transpose(hidden_activation.as_numpy_array()))) q_temp = gpu.dot(gpu.garray(transpose(weights2.as_numpy_array())), p) #q = multiply(multiply(q_temp,hidden_activation),(1-hidden_activation)) q = q_temp * hidden_derivative delta2 = gpu.dot(q, gpu.garray(transpose(data.as_numpy_array()))) weights1_grad += delta2[1:shape(delta2)[0], :] weights1_grad = weights1_grad / nData weights2_grad = weights2_grad / nData weights1_grad[:, 1:shape(weights1_grad)[1]] = weights1_grad[:, 1:shape( weights1_grad)[1]] + weights1[:, 1:shape(weights1)[1]] * lambda_val weights2_grad[:, 1:shape(weights2_grad)[1]] = weights2_grad[:, 1:shape( weights2_grad)[1]] + weights2[:, 1:shape(weights2)[1]] * lambda_val #weights1_grad = reshape(weights1_grad, num_weights1) weights1_grad = weights1_grad.reshape(num_weights1) #weights2_grad = reshape(weights2_grad, num_weights2) weights2_grad = weights2_grad.reshape(num_weights2) del x del inputs del data del p del q_temp del q del delta2 del hidden_sum del hidden_activation del weights1 del weights2 gpu.free_reuse_cache() return hstack( (weights1_grad.as_numpy_array(), weights2_grad.as_numpy_array()))
def run_through_network(self, xs): hid = xs for n_rbm in self.network: vis = gp.garray(hid) g_rbm = RBM(n_rbm.n_visible, n_rbm.n_hidden, n_rbm.vistype, n_rbm.hidtype, n_rbm.W, n_rbm.hbias, n_rbm.vbias) hid = self.get_activation(g_rbm, hid) gp.free_reuse_cache() return hid
def run_through_network(self, data): hid = data for n_rbm in self.network: #vis = gp.garray(hid) g_rbm = RBM(n_rbm.n_visible, n_rbm.n_hidden, n_rbm.vistype, n_rbm.hidtype, n_rbm.W, n_rbm.hbias, n_rbm.vbias, stream=self.stream) hid = self.get_activation(g_rbm, data) gp.free_reuse_cache() return hid
def mlpSoftmax1Layer_grad(x, *args): numClasses, inputSize, l1Size, lambda_softmax, lambda_hidden, inputs, groundTruth = args numCases = shape(inputs)[1] num_weights_L1 = l1Size * (inputSize + 1) num_weights_softmax = numClasses * l1Size inputs = gpu.garray(inputs) theta_L1 = gpu.garray(reshape(x[0:num_weights_L1], (l1Size, inputSize + 1))) theta_softmax = gpu.garray( reshape(x[num_weights_L1:shape(x)[0]], (numClasses, l1Size))) theta_L1_grad = gpu.zeros(shape(theta_L1)) inputs = gpu.concatenate((gpu.ones((1, numCases)), inputs), axis=0) hidden_sum_L1 = gpu.dot(theta_L1, inputs) #hidden_activation_L1 = gpu.log(1+hidden_sum_L1.exp()) #hidden_derivative_L1 = hidden_sum_L1.logistic() relu_mask_hidden1 = gpu.ones(shape(hidden_sum_L1)) * (hidden_sum_L1 > 0) hidden_activation_L1 = hidden_sum_L1 * relu_mask_hidden1 #hidden_activation_L1 = hidden_sum_L1.logistic() hidden_derivative_L1 = relu_mask_hidden1 hidden_sum_softmax_imd = gpu.dot(theta_softmax, hidden_activation_L1) hidden_sum_softmax = hidden_sum_softmax_imd - hidden_sum_softmax_imd.max( axis=0) predictions = hidden_sum_softmax.exp() predictions = predictions / gpu.sum(predictions, axis=0) softmax_imd = groundTruth - predictions theta_softmax_grad = -1 * gpu.dot( softmax_imd, gpu.garray(transpose(hidden_activation_L1.as_numpy_array())) ) / numCases + lambda_softmax * theta_softmax deltaOut = -softmax_imd delta_L1_imd = gpu.dot( gpu.garray(transpose(theta_softmax.as_numpy_array())), deltaOut) delta_L1_imd2 = delta_L1_imd * hidden_derivative_L1 #delta_L1_imd2 = (delta_L1_imd*hidden_activation_L1)*(1-hidden_activation_L1) delta_L1 = gpu.dot(delta_L1_imd2, gpu.garray(transpose(inputs.as_numpy_array()))) theta_L1_grad += delta_L1 theta_L1_grad = theta_L1_grad / numCases theta_L1_grad[:, 1:shape(theta_L1_grad)[1]] = theta_L1_grad[:, 1:shape( theta_L1_grad)[1]] + theta_L1[:, 1:shape(theta_L1)[1]] * lambda_hidden theta_L1_grad = reshape(theta_L1_grad.as_numpy_array(), num_weights_L1) theta_softmax_grad = reshape(theta_softmax_grad.as_numpy_array(), num_weights_softmax) del inputs del theta_L1 del theta_softmax del hidden_sum_L1 del hidden_activation_L1 del hidden_sum_softmax del predictions del softmax_imd del deltaOut del delta_L1_imd del delta_L1_imd2 del delta_L1 gpu.free_reuse_cache() return hstack((theta_L1_grad, theta_softmax_grad))
def getJacobian(nn_input, frames): bfs =[i for i in nn_input.cfg.bfs()] bfs.reverse() Z_new = nn_input._layers['MIL_pool'].Z _,delta = nn_input._layers['MIL_pool']._ComputeParamGradient(Z_new * nn_input._layers['MIL_pool'].A.reshape(frames,17,1,1)) for l in bfs[3:-1]: gnp.free_reuse_cache() delta = nn_input._layers[l].BackProp(gnp.relu(delta)) return delta.as_numpy_array()
def mlpSoftmax_costfunc(x, *args): numClasses, inputSize, l1Size, l2Size, lambda_softmax, lambda_hidden, inputs, labels, groundTruth = args numCases = shape(inputs)[1] num_weights_L1 = l1Size * (inputSize + 1) num_weights_L2 = l2Size * (l1Size + 1) #x = gpu.garray(x) inputs = gpu.garray(inputs) theta_L1 = gpu.garray(reshape(x[0:num_weights_L1], (l1Size, inputSize + 1))) #theta_L1 = x[0:num_weights_L1].reshape((l1Size, inputSize + 1)) #print numClasses, l2Size theta_L2 = gpu.garray( reshape(x[num_weights_L1:num_weights_L2 + num_weights_L1], (l2Size, l1Size + 1))) #theta_L2 = x[num_weights_L1:num_weights_L2+num_weights_L1].reshape((l2Size, l1Size + 1)) theta_softmax = gpu.garray( reshape(x[num_weights_L2 + num_weights_L1:shape(x)[0]], (numClasses, l2Size))) #theta_softmax = x[num_weights_L2+num_weights_L1:shape(x)[0]].reshape((numClasses, l2Size)) inputs = gpu.concatenate((gpu.ones((1, numCases)), inputs), axis=0) hidden_sum_L1 = gpu.dot(theta_L1, inputs) hidden_activation_L1 = hidden_sum_L1.logistic() hidden_activation_L1 = gpu.concatenate((gpu.ones( (1, numCases)), hidden_activation_L1), axis=0) hidden_sum_L2 = gpu.dot(theta_L2, hidden_activation_L1) hidden_activation_L2 = hidden_sum_L2.logistic() hidden_sum_softmax = gpu.dot(theta_softmax, hidden_activation_L2) hidden_sum_softmax = hidden_sum_softmax - hidden_sum_softmax.max(axis=0) predictions = hidden_sum_softmax.exp() predictions = predictions / gpu.sum(predictions, axis=0) temp = groundTruth * gpu.log(predictions) regularized_penalty_L1 = theta_L1[:, 1:shape(theta_L1)[1]] regularized_penalty_L2 = theta_L2[:, 1:shape(theta_L2)[1]] regularized_penalty_L1 = regularized_penalty_L1 * regularized_penalty_L1 regularized_penalty_L2 = regularized_penalty_L2 * regularized_penalty_L2 cost = -1 * gpu.sum(temp) / numCases + 0.5 * lambda_hidden * ( gpu.sum(regularized_penalty_L1) + gpu.sum(regularized_penalty_L2) ) + 0.5 * lambda_softmax * gpu.sum(theta_softmax * theta_softmax) print 'Multilayer Softmax Cost:', cost del inputs del theta_L1 del theta_L2 del theta_softmax del hidden_sum_L1 del hidden_activation_L1 del hidden_sum_L2 del hidden_activation_L2 del hidden_sum_softmax del predictions del temp del regularized_penalty_L1 del regularized_penalty_L2 gpu.free_reuse_cache() return cost
def getJacobian(nn_input, frames): bfs = [i for i in nn_input.cfg.bfs()] bfs.reverse() Z_new = nn_input._layers['MIL_pool'].Z _, delta = nn_input._layers['MIL_pool']._ComputeParamGradient( Z_new * nn_input._layers['MIL_pool'].A.reshape(frames, 17, 1, 1)) for l in bfs[3:-1]: gnp.free_reuse_cache() delta = nn_input._layers[l].BackProp(gnp.relu(delta)) return delta.as_numpy_array()
def getJacobian_per_class(nn_input, loc, frames): bfs =[i for i in nn_input.cfg.bfs()] bfs.reverse() Z_new = nn_input._layers['MIL_pool'].Z Z_inv = gnp.zeros(Z_new.shape) Z_inv[:,loc,:,:] = Z_new[:,loc,:,:] * nn_input._layers['MIL_pool'].A[:,loc].reshape(frames,1,1) _,delta = nn_input._layers['MIL_pool']._ComputeParamGradient(Z_inv) for l in bfs[3:-1]: gnp.free_reuse_cache() delta = nn_input._layers[l].BackProp(gnp.relu(delta)) return delta.as_numpy_array()
def run_down_through_network(self, xs): hid = xs copy = self.network[:] copy.reverse() for n_rbm in copy: vis = gp.garray(hid) g_rbm = RBM(n_rbm.n_visible, n_rbm.n_hidden, n_rbm.vistype, n_rbm.hidtype, n_rbm.W, n_rbm.hbias, n_rbm.vbias) hid = self.get_visible(g_rbm, hid) gp.free_reuse_cache() return hid
def getJacobian_per_class(nn_input, loc, frames): bfs = [i for i in nn_input.cfg.bfs()] bfs.reverse() Z_new = nn_input._layers['MIL_pool'].Z Z_inv = gnp.zeros(Z_new.shape) Z_inv[:, loc, :, :] = Z_new[:, loc, :, :] * nn_input._layers[ 'MIL_pool'].A[:, loc].reshape(frames, 1, 1) _, delta = nn_input._layers['MIL_pool']._ComputeParamGradient(Z_inv) for l in bfs[3:-1]: gnp.free_reuse_cache() delta = nn_input._layers[l].BackProp(gnp.relu(delta)) return delta.as_numpy_array()
def grad_costfunc_gpu(x, *args): num_input,num_hidden,num_output,inputs,lambda_val,sparsityParam,beta = args num_weights1 = (num_input+1)*num_hidden num_weights2 = (num_hidden+1)*num_output x = gpu.garray(x) inputs = gpu.garray(inputs) weights1 = x[0:num_weights1].reshape((num_hidden,num_input+1)) weights2 = x[num_weights1:shape(x)[0]].reshape((num_output,num_hidden+1)) nData = shape(inputs)[1] data = gpu.concatenate((gpu.ones((1,nData)), inputs), axis = 0) hidden_sum = gpu.dot(weights1, data) hidden_activation = hidden_sum.logistic() p_avg = gpu.sum(hidden_activation,axis=1)/nData grad_sparse = -1*sparsityParam/p_avg.as_numpy_array() + (1-sparsityParam)/(1-p_avg.as_numpy_array()) grad_sparse = append(0,grad_sparse) grad_sparse = tile(grad_sparse, (nData, 1)) grad_sparse = gpu.garray(transpose(grad_sparse)) hidden_activation = gpu.concatenate((gpu.ones((1,nData)), hidden_activation), axis = 0) outputs = gpu.dot(weights2, hidden_activation) weights1_grad = gpu.zeros(shape(weights1)) weights2_grad = gpu.zeros(shape(weights2)) p = outputs-inputs weights2_grad += gpu.dot(p, gpu.garray(transpose(hidden_activation.as_numpy_array()))) q_temp = gpu.dot(gpu.garray(transpose(weights2.as_numpy_array())),p) + beta*grad_sparse #q = multiply(multiply(q_temp,hidden_activation),(1-hidden_activation)) q = (q_temp*hidden_activation)*(1-hidden_activation) delta2 = gpu.dot(q, gpu.garray(transpose(data.as_numpy_array()))) weights1_grad += delta2[1:shape(delta2)[0], :] weights1_grad = weights1_grad/nData weights2_grad = weights2_grad/nData weights1_grad[:,1:shape(weights1_grad)[1]] = weights1_grad[:,1:shape(weights1_grad)[1]] + weights1[:,1:shape(weights1)[1]] * lambda_val weights2_grad[:,1:shape(weights2_grad)[1]] = weights2_grad[:,1:shape(weights2_grad)[1]] + weights2[:,1:shape(weights2)[1]] * lambda_val #weights1_grad = reshape(weights1_grad, num_weights1) weights1_grad = weights1_grad.reshape(num_weights1) #weights2_grad = reshape(weights2_grad, num_weights2) weights2_grad = weights2_grad.reshape(num_weights2) del x del inputs del data del grad_sparse del p del q_temp del q del delta2 del hidden_sum del hidden_activation del weights1 del weights2 gpu.free_reuse_cache() return hstack((weights1_grad.as_numpy_array(),weights2_grad.as_numpy_array()))
def costfunc_gpu(x, *args): num_input, num_hidden, num_output, inputs, noNoiseData, lambda_val, sparsityParam, beta = args num_weights1 = (num_input + 1) * num_hidden x = gpu.garray(x) # randomNoise = random.random_sample(shape(inputs)) # criteriaTable = randomNoise > 0.32 # inputs = inputs * criteriaTable inputs = gpu.garray(inputs) noNoiseData = gpu.garray(noNoiseData) #weights1 = gpu.garray(reshape(x[0:num_weights1],(num_hidden,num_input+1))) weights1 = x[0:num_weights1].reshape((num_hidden, num_input + 1)) #weights2 = gpu.garray(reshape(x[num_weights1:shape(x)[0]], (num_output,num_hidden+1))) weights2 = x[num_weights1:shape(x)[0]].reshape( (num_output, num_hidden + 1)) nData = shape(inputs)[1] data = gpu.concatenate((gpu.ones((1, nData)), inputs), axis=0) hidden_sum = gpu.dot(weights1, data) hidden_activation = hidden_sum.logistic() p_avg = gpu.sum(hidden_activation, axis=1) / nData hidden_activation = gpu.concatenate((gpu.ones( (1, nData)), hidden_activation), axis=0) output = gpu.dot(weights2, hidden_activation) regularized_penalty1 = weights1[:, 1:shape(weights1)[1]] regularized_penalty2 = weights2[:, 1:shape(weights2)[1]] regularized_penalty1 = regularized_penalty1 * regularized_penalty1 regularized_penalty2 = regularized_penalty2 * regularized_penalty2 output_target_diff = (output - noNoiseData) * (output - noNoiseData) KL = gpu.sum(sparsityParam * gpu.log(sparsityParam / p_avg) + (1 - sparsityParam) * gpu.log((1 - sparsityParam) / (1 - p_avg))) cost = gpu.sum(output_target_diff) / (2 * nData) + 0.5 * lambda_val * ( gpu.sum(regularized_penalty1) + gpu.sum(regularized_penalty2)) + beta * KL print 'GPU Linear Denoising Decoder Cost: ', cost del x del inputs del noNoiseData del data del hidden_sum del hidden_activation del p_avg del output del regularized_penalty1 del regularized_penalty2 del weights1 del weights2 del output_target_diff gpu.free_reuse_cache() return cost
def mlpSingleOutput1Layer_grad(x, *args): inputSize, l1Size, lambda_hidden, inputs, targets = args numCases = shape(inputs)[1] num_weights_L1 = l1Size * (inputSize + 1) num_weights_output = 1 * (l1Size + 1) inputs = gpu.garray(inputs) targets = gpu.garray(targets) theta_L1 = gpu.garray(reshape(x[0:num_weights_L1], (l1Size, inputSize + 1))) theta_output = gpu.garray( reshape(x[num_weights_L1:shape(x)[0]], (1, l1Size + 1))) inputs = gpu.concatenate((gpu.ones((1, numCases)), inputs), axis=0) hidden_sum_L1 = gpu.dot(theta_L1, inputs) hidden_activation_L1 = hidden_sum_L1.logistic() hidden_activation_L1 = gpu.concatenate((gpu.ones( (1, numCases)), hidden_activation_L1), axis=0) #hidden_activation_L1 = hidden_activation_L1 * dropout_prob hidden_sum_output = gpu.dot(theta_output, hidden_activation_L1) outputs = hidden_sum_output.logistic() theta_L1_grad = gpu.zeros(shape(theta_L1)) theta_output_grad = gpu.zeros(shape(theta_output)) a = (outputs - targets) * outputs * (1 - outputs) theta_output_grad += gpu.dot( a, gpu.garray(transpose(hidden_activation_L1.as_numpy_array()))) b_temp = gpu.dot(gpu.garray(transpose(theta_output.as_numpy_array())), a) b = (b_temp * hidden_activation_L1) * (1 - hidden_activation_L1) delta2 = gpu.dot(b, gpu.garray(transpose(inputs.as_numpy_array()))) theta_L1_grad += delta2[1:shape(delta2)[0], :] theta_L1_grad = theta_L1_grad / numCases theta_output_grad = theta_output_grad / numCases theta_output_grad[:, 1:shape( theta_output_grad)[1]] = theta_output_grad[:, 1:shape( theta_output_grad )[1]] + theta_output[:, 1:shape(theta_output)[1]] * lambda_hidden theta_L1_grad[:, 1:shape(theta_L1_grad)[1]] = theta_L1_grad[:, 1:shape( theta_L1_grad)[1]] + theta_L1[:, 1:shape(theta_L1)[1]] * lambda_hidden theta_output_grad = reshape(theta_output_grad.as_numpy_array(), num_weights_output) theta_L1_grad = reshape(theta_L1_grad.as_numpy_array(), num_weights_L1) del inputs del theta_L1 del hidden_sum_L1 del hidden_activation_L1 gpu.free_reuse_cache() return hstack((theta_L1_grad, theta_output_grad))
def mlpSoftmax_costfunc(x, *args): numClasses, inputSize, l1Size, l2Size, lambda_softmax, lambda_hidden, inputs, labels, groundTruth = args numCases = shape(inputs)[1] num_weights_L1 = l1Size * (inputSize + 1) num_weights_L2 = l2Size * (l1Size + 1) #x = gpu.garray(x) inputs = gpu.garray(inputs) theta_L1 = gpu.garray(reshape(x[0:num_weights_L1], (l1Size, inputSize + 1))) #theta_L1 = x[0:num_weights_L1].reshape((l1Size, inputSize + 1)) #print numClasses, l2Size theta_L2 = gpu.garray(reshape(x[num_weights_L1:num_weights_L2+num_weights_L1], (l2Size, l1Size + 1))) #theta_L2 = x[num_weights_L1:num_weights_L2+num_weights_L1].reshape((l2Size, l1Size + 1)) theta_softmax = gpu.garray(reshape(x[num_weights_L2+num_weights_L1:shape(x)[0]], (numClasses, l2Size))) #theta_softmax = x[num_weights_L2+num_weights_L1:shape(x)[0]].reshape((numClasses, l2Size)) inputs = gpu.concatenate((gpu.ones((1,numCases)), inputs), axis = 0) hidden_sum_L1 = gpu.dot(theta_L1, inputs) hidden_activation_L1 = hidden_sum_L1.logistic() hidden_activation_L1 = gpu.concatenate((gpu.ones((1,numCases)), hidden_activation_L1), axis=0) hidden_sum_L2 = gpu.dot(theta_L2, hidden_activation_L1) hidden_activation_L2 = hidden_sum_L2.logistic() hidden_sum_softmax = gpu.dot(theta_softmax, hidden_activation_L2) hidden_sum_softmax = hidden_sum_softmax - hidden_sum_softmax.max(axis = 0) predictions = hidden_sum_softmax.exp() predictions = predictions / gpu.sum(predictions,axis = 0) temp = groundTruth*gpu.log(predictions) regularized_penalty_L1 = theta_L1[:,1:shape(theta_L1)[1]] regularized_penalty_L2 = theta_L2[:,1:shape(theta_L2)[1]] regularized_penalty_L1 = regularized_penalty_L1 * regularized_penalty_L1 regularized_penalty_L2 = regularized_penalty_L2 * regularized_penalty_L2 cost = -1*gpu.sum(temp)/numCases + 0.5 * lambda_hidden*(gpu.sum(regularized_penalty_L1) + gpu.sum(regularized_penalty_L2)) + 0.5 * lambda_softmax * gpu.sum(theta_softmax*theta_softmax) print 'Multilayer Softmax Cost:', cost del inputs del theta_L1 del theta_L2 del theta_softmax del hidden_sum_L1 del hidden_activation_L1 del hidden_sum_L2 del hidden_activation_L2 del hidden_sum_softmax del predictions del temp del regularized_penalty_L1 del regularized_penalty_L2 gpu.free_reuse_cache() return cost
def running(inputData, l1Size, l2Size): inputs = inputData inputSize = 30 sparsityParam = 0.05 lambda_val = 7e-5 lambda_valFineTune = 1e-5 beta = 3 multilayer_feature_learning(inputs, inputSize, l1Size, l2Size, sparsityParam, lambda_val, beta) weights1 = scipy.io.loadmat('HiggsBosonLevel1.mat')['learntFeaturesL1_1'] weights2 = scipy.io.loadmat('HiggsBosonLevel2.mat')['learntFeaturesL2_1'] weights3 = scipy.io.loadmat('HiggsBosonLevel2.mat')['learntFeaturesL2_2'] weights4 = scipy.io.loadmat('HiggsBosonLevel1.mat')['learntFeaturesL1_2'] gpu.free_reuse_cache() print "Fine Tuning the abstraction network..." num_input = inputSize num_hidden1 = l1Size num_hidden2 = l2Size num_hidden3 = l1Size num_output = num_input num_weights1 = (num_input+1)*num_hidden1 num_weights2 = (num_hidden1+1)*num_hidden2 num_weights3 = (num_hidden2+1)*num_hidden3 num_weights4 = (num_hidden3+1)*num_output weights1 = reshape(weights1, num_weights1) weights2 = reshape(weights2, num_weights2) weights3 = reshape(weights3, num_weights3) weights4 = reshape(weights4, num_weights4) weights = hstack((weights1,weights2,weights3,weights4)) print "Fine Tuning Starting..." stepSize = 200000 for i in range(int(shape(inputs)[1]/stepSize)): print "Batch:", i data = inputs[:,i*stepSize:(i+1)*stepSize] args = (num_input, num_hidden1, num_hidden2, num_hidden3, lambda_valFineTune, data) opttheta = optimize.fmin_l_bfgs_b(fine_tuning_cost_gpu, weights, fprime=fine_tuning_grad_gpu, args=args, maxiter=200) weights = opttheta[0] del opttheta gpu.free_reuse_cache() weights1 = reshape(weights[0:num_weights1], (l1Size, inputSize + 1)) weights2 = reshape(weights[num_weights1:num_weights1+num_weights2], (l2Size, l1Size + 1)) weights3 = reshape(weights[num_weights1+num_weights2:num_weights1+num_weights2+num_weights3], (num_hidden3, l2Size + 1)) weights4 = reshape(weights[num_weights1+num_weights2+num_weights3:shape(weights)[0]], (inputSize, num_hidden3 + 1)) scipy.io.savemat('HiggsBoson_FineTuned_features2Layers.mat', mdict={'learntFeaturesL1': weights1,'learntFeaturesL2': weights2, 'learntFeaturesL3': weights3, 'learntFeaturesL4': weights4}) return weights1, weights2
def run_through_network(self, data, net=None): ''' Gets the output of the top layer of the network given input data on the bottom. args: array data: the input data obj net: the network to use, default is self.network returns: array hid: the activation of the top layer ''' if net is None: net = self.network hid = data for layer in net: vis = gp.garray(hid) hid = self.get_activation(layer, vis) gp.free_reuse_cache() return hid
def _compute_loss(self, X, T, batch_size=1000): n_total = X.shape[0] n_batches = n_total / batch_size loss = 0 for i in range(n_batches): gnp.free_reuse_cache() i_start = i * batch_size if i < n_batches - 1: i_end = i_start + batch_size else: i_end = n_total Xbatch = X[i_start:i_end] Tbatch = T[i_start:i_end] self._forward(Xbatch) loss += self.output.loss(Tbatch) return loss / n_total
def costfunc_gpu_ReLU(x, *args): num_input, num_hidden, num_output, inputs, lambda_val, sparsityParam, beta = args num_weights1 = (num_input + 1) * num_hidden x = gpu.garray(x) inputs = gpu.garray(inputs) #weights1 = gpu.garray(reshape(x[0:num_weights1],(num_hidden,num_input+1))) weights1 = x[0:num_weights1].reshape((num_hidden, num_input + 1)) #weights2 = gpu.garray(reshape(x[num_weights1:shape(x)[0]], (num_output,num_hidden+1))) weights2 = x[num_weights1:shape(x)[0]].reshape( (num_output, num_hidden + 1)) nData = shape(inputs)[1] data = gpu.concatenate((gpu.ones((1, nData)), inputs), axis=0) hidden_sum = gpu.dot(weights1, data) #hidden_activation = gpu.log(1+hidden_sum.exp()) relu_mask_hidden1 = gpu.ones(shape(hidden_sum)) * (hidden_sum > 0) hidden_activation = hidden_sum * relu_mask_hidden1 hidden_activation = gpu.concatenate((gpu.ones( (1, nData)), hidden_activation), axis=0) output = gpu.dot(weights2, hidden_activation) regularized_penalty1 = weights1[:, 1:shape(weights1)[1]] regularized_penalty2 = weights2[:, 1:shape(weights2)[1]] regularized_penalty1 = regularized_penalty1 * regularized_penalty1 regularized_penalty2 = regularized_penalty2 * regularized_penalty2 output_target_diff = (output - inputs) * (output - inputs) cost = gpu.sum(output_target_diff) / (2 * nData) + 0.5 * lambda_val * ( gpu.sum(regularized_penalty1) + gpu.sum(regularized_penalty2)) print 'GPU ReLU Linear Decoder Cost: ', cost del x del inputs del data del hidden_sum del hidden_activation del output del regularized_penalty1 del regularized_penalty2 del weights1 del weights2 del output_target_diff gpu.free_reuse_cache() return cost
def mlpSoftmax1Layer_costfunc(x, *args): numClasses, inputSize, l1Size, lambda_softmax, lambda_hidden, inputs, groundTruth = args numCases = shape(inputs)[1] num_weights_L1 = l1Size * (inputSize + 1) inputs = gpu.garray(inputs) theta_L1 = gpu.garray(reshape(x[0:num_weights_L1], (l1Size, inputSize + 1))) theta_softmax = gpu.garray( reshape(x[num_weights_L1:shape(x)[0]], (numClasses, l1Size))) inputs = gpu.concatenate((gpu.ones((1, numCases)), inputs), axis=0) hidden_sum_L1 = gpu.dot(theta_L1, inputs) #hidden_activation_L1 = gpu.log(1+hidden_sum_L1.exp()) relu_mask_hidden1 = gpu.ones(shape(hidden_sum_L1)) * (hidden_sum_L1 > 0) hidden_activation_L1 = hidden_sum_L1 * relu_mask_hidden1 #hidden_activation_L1 = hidden_sum_L1.logistic() hidden_sum_softmax = gpu.dot(theta_softmax, hidden_activation_L1) hidden_sum_softmax = hidden_sum_softmax - hidden_sum_softmax.max(axis=0) predictions = hidden_sum_softmax.exp() predictions = predictions / gpu.sum(predictions, axis=0) temp = groundTruth * gpu.log(predictions) temp = temp.as_numpy_array() temp[temp == -inf] = -200.0 temp = nan_to_num(temp) regularized_penalty_L1 = theta_L1[:, 1:shape(theta_L1)[1]] regularized_penalty_L1 = regularized_penalty_L1 * regularized_penalty_L1 cost = -1 * sum(temp) / numCases + 0.5 * lambda_hidden * ( gpu.sum(regularized_penalty_L1)) + 0.5 * lambda_softmax * gpu.sum( theta_softmax * theta_softmax) print 'Multilayer Softmax Cost:', cost del inputs del theta_L1 del theta_softmax del hidden_sum_L1 del hidden_activation_L1 del hidden_sum_softmax del predictions del temp del regularized_penalty_L1 gpu.free_reuse_cache() return cost
def test(opts): import editDistance as ed print "Testing model %s" % opts.inFile phone_map = get_phone_map_swbd() with open(opts.inFile, 'r') as fid: old_opts = pickle.load(fid) _ = pickle.load(fid) _ = pickle.load(fid) loader = dl.DataLoader(opts.dataDir, old_opts.rawDim, old_opts.inputDim) if 'layers' not in dir(old_opts): old_opts.layers = [old_opts.layerSize] * old_opts.numLayers nn = nnet.NNet(old_opts.inputDim, old_opts.outputDim, old_opts.layers, train=False) nn.initParams() nn.fromFile(fid) totdist = numphones = 0 fid = open('hyp.txt', 'w') for i in range(1, opts.numFiles + 1): data_dict, alis, keys, sizes = loader.loadDataFileDict(i) for k in keys: gp.free_reuse_cache() hyp = nn.costAndGrad(data_dict[k]) hyp = [phone_map[h] for h in hyp] ref = [phone_map[int(r)] for r in alis[k]] dist, ins, dels, subs, corr = ed.edit_distance(ref, hyp) print "Distance %d/%d" % (dist, len(ref)) fid.write(k + ' ' + ' '.join(hyp) + '\n') totdist += dist numphones += len(alis[k]) fid.close() print "PER : %f" % (100 * totdist / float(numphones))
def train_kmeans_layer(X, in_shape, K, ksize, n_patches_per_image, prep_type=None, pad_h=0, pad_w=0, repeat=1, **kwargs): train_data = get_random_patches(X, in_shape, ksize, n_patches_per_image, pad_h=pad_h, pad_w=pad_w) if prep_type is not None: prep = pp.choose_preprocessor_by_name(prep_type) prep.train(train_data) train_data = prep.process(train_data) else: prep = None C_best = None loss_best = None for i_repeat in xrange(repeat): print '*** repeat #%d ***' % (i_repeat + 1) gnp.free_reuse_cache() C, _, loss = clust.kmeans(train_data, K, **kwargs) if loss_best is None or loss < loss_best: loss_best = loss C_best = C print '>>> best loss: %.2f' % loss_best return KMeansModel(C_best, kwargs.get('dist', 'euclidean'), in_shape.c, ksize, prep)
def apply_nn_train_prePCA(net, nCxt, outLayer, feat_dir, FeatList, outFeatDir, Nframes, useDropout): """Sends the training features for feedforward and collects the output in a matrix X for performing PCA""" fdir = ''; dim = net.weights[-2].shape[1] X = np.zeros((Nframes,dim)) inFeatList = open(feat_dir + FeatList).readlines() fro = 0; to = 0; for fname in inFeatList: if fname.rstrip()[-1] == ':': fdir = fname.rstrip()[:-1]+'/' continue elif fname.rstrip()[-3:]=='txt': utt = np.loadtxt(feat_dir + fdir + fname.rstrip()) # if not useDropout: outputs = gpu.as_numpy_array(net.fprop_xf(utt, outLayer)) # else: # outputs = gpu.as_numpy_array(net.fpropDropout(utt, outLayer)) assert(outputs.shape[1] == 40) fro = to to = fro + outputs.shape[0] # if X == None: # X = outputs # else: X[fro:to] = outputs # X = np.concatenate((X,outputs)) # if i/1*1 == i: # gpu.free_reuse_cache() # np.savetxt(feat_dir + outFeatDir + 'train_16k_prePCA/' + fname, gpu.as_numpy_array(outputs)) np.save(feat_dir + outFeatDir + 'train_prePCA/' + fname[:-5], outputs) del outputs gpu.free_reuse_cache() #End of for return X
def costfunc_gpu(x, *args): num_input,num_hidden,num_output,inputs,lambda_val,sparsityParam,beta = args num_weights1 = (num_input+1)*num_hidden x = gpu.garray(x) inputs = gpu.garray(inputs) #weights1 = gpu.garray(reshape(x[0:num_weights1],(num_hidden,num_input+1))) weights1 = x[0:num_weights1].reshape((num_hidden,num_input+1)) #weights2 = gpu.garray(reshape(x[num_weights1:shape(x)[0]], (num_output,num_hidden+1))) weights2 = x[num_weights1:shape(x)[0]].reshape((num_output,num_hidden+1)) nData = shape(inputs)[1] data = gpu.concatenate((gpu.ones((1,nData)), inputs), axis = 0) hidden_sum = gpu.dot(weights1, data) hidden_activation = hidden_sum.logistic() p_avg = gpu.sum(hidden_activation,axis=1)/nData hidden_activation = gpu.concatenate((gpu.ones((1,nData)), hidden_activation), axis = 0) output = gpu.dot(weights2, hidden_activation) regularized_penalty1 = weights1[:,1:shape(weights1)[1]] regularized_penalty2 = weights2[:,1:shape(weights2)[1]] regularized_penalty1 = regularized_penalty1 * regularized_penalty1 regularized_penalty2 = regularized_penalty2 * regularized_penalty2 output_target_diff = (output - inputs)*(output - inputs) KL = gpu.sum(sparsityParam*gpu.log(sparsityParam/p_avg) + (1-sparsityParam)*gpu.log((1-sparsityParam)/(1-p_avg))) cost = gpu.sum(output_target_diff)/(2*nData) + 0.5 * lambda_val * (gpu.sum(regularized_penalty1) + gpu.sum(regularized_penalty2)) + beta*KL print 'Linear Decoder Cost: ', cost del x del inputs del data del hidden_sum del hidden_activation del p_avg del output del regularized_penalty1 del regularized_penalty2 del weights1 del weights2 del output_target_diff gpu.free_reuse_cache() return cost
def train(self, data, epochs, eta): ''' Trains the deep net one RBM at a time args: array data: the training data (a gnumpy.array) list[int] epochs: the number of training epochs for each RBM float eta: the learning rate ''' layers = [] vis = data for i in range(len(self.layer_sizes) - 1): print "Pretraining RBM %d, vis=%d, hid=%d" % ( i + 1, self.layer_sizes[i], self.layer_sizes[i + 1]) g_rbm = RBM(self.layer_sizes[i], self.layer_sizes[i + 1], self.layer_types[i], self.layer_types[i + 1]) g_rbm.train(vis, epochs[i], eta) hid = self.get_activation(g_rbm, vis) vis = hid n_rbm = Holder(g_rbm) layers.append(n_rbm) gp.free_reuse_cache() self.network = layers
def train(self, data, epochs, eta): ''' Trains the deep net one RBM at a time args: array data: the training data (a gnumpy.array) list[int] epochs: the number of training epochs for each RBM float eta: the learning rate ''' layers = [] vis = data for i in range(len(self.layer_sizes)-1): self.stream.write("Pretraining RBM %d, vis=%d, hid=%d" % (i+1, self.layer_sizes[i], self.layer_sizes[i+1])) g_rbm = RBM(self.layer_sizes[i], self.layer_sizes[i+1], self.layer_types[i], self.layer_types[i+1], stream=self.stream) g_rbm.train(vis, epochs[i], eta) hid = self.get_activation(g_rbm, vis) vis = hid n_rbm = Holder(g_rbm) layers.append(n_rbm) gp.free_reuse_cache() self.network = layers
def train(self, xs, epochs, eta, early_stop = True): ''' Trains the deep net one RBM at a time args: array xs: the training xs (a gnumpy.array) list[int] epochs: the number of training epochs for each RBM float eta: the learning rate ''' top_layers = [] vis = xs for i in range(len(self.layer_sizes)-1): print "Pretraining RBM %d, vis=%d, hid=%d" % (i+1, self.layer_sizes[i], self.layer_sizes[i+1]) g_rbm = RBM(self.layer_sizes[i], self.layer_sizes[i+1], self.layer_types[i], self.layer_types[i+1]) g_rbm.train(vis, epochs[i], eta[i], sample = self.sample, early_stop = early_stop) hid = self.get_activation(g_rbm, vis) vis = hid n_rbm = Holder(g_rbm) top_layers.append(n_rbm) gp.free_reuse_cache() self.network = top_layers return vis
def mlpSingleOutput1Layer_costfunc(x, *args): inputSize, l1Size, lambda_hidden, inputs, targets = args numCases = shape(inputs)[1] num_weights_L1 = l1Size * (inputSize + 1) inputs = gpu.garray(inputs) targets = gpu.garray(targets) theta_L1 = gpu.garray(reshape(x[0:num_weights_L1], (l1Size, inputSize + 1))) theta_output = gpu.garray( reshape(x[num_weights_L1:shape(x)[0]], (1, l1Size + 1))) inputs = gpu.concatenate((gpu.ones((1, numCases)), inputs), axis=0) hidden_sum_L1 = gpu.dot(theta_L1, inputs) hidden_activation_L1 = hidden_sum_L1.logistic() hidden_activation_L1 = gpu.concatenate((gpu.ones( (1, numCases)), hidden_activation_L1), axis=0) #hidden_activation_L1 = hidden_activation_L1 * dropout_prob hidden_sum_output = gpu.dot(theta_output, hidden_activation_L1) outputs = hidden_sum_output.logistic() output_target_diff = (outputs - targets)**2 regularized_penalty_output = theta_output[:, 1:shape(theta_output)[1]] regularized_penalty_output = regularized_penalty_output * regularized_penalty_output regularized_penalty_L1 = theta_L1[:, 1:shape(theta_L1)[1]] regularized_penalty_L1 = regularized_penalty_L1 * regularized_penalty_L1 cost = gpu.sum(output_target_diff) / ( 2 * numCases) + 0.5 * lambda_hidden * (gpu.sum(regularized_penalty_L1) + gpu.sum(regularized_penalty_output)) print 'Multilayer Preceptron Cost:', cost del inputs del theta_L1 del hidden_sum_L1 del hidden_activation_L1 del regularized_penalty_output del regularized_penalty_L1 gpu.free_reuse_cache() return cost
def mlpSingleOutput1Layer_grad(x, *args): inputSize, l1Size, lambda_hidden, inputs, targets = args numCases = shape(inputs)[1] num_weights_L1 = l1Size * (inputSize + 1) num_weights_output = 1 * (l1Size+1) inputs = gpu.garray(inputs) targets = gpu.garray(targets) theta_L1 = gpu.garray(reshape(x[0:num_weights_L1], (l1Size, inputSize + 1))) theta_output = gpu.garray(reshape(x[num_weights_L1:shape(x)[0]], (1, l1Size+1))) inputs = gpu.concatenate((gpu.ones((1,numCases)), inputs), axis = 0) hidden_sum_L1 = gpu.dot(theta_L1, inputs) hidden_activation_L1 = hidden_sum_L1.logistic() hidden_activation_L1 = gpu.concatenate((gpu.ones((1,numCases)), hidden_activation_L1), axis = 0) #hidden_activation_L1 = hidden_activation_L1 * dropout_prob hidden_sum_output = gpu.dot(theta_output, hidden_activation_L1) outputs = hidden_sum_output.logistic() theta_L1_grad = gpu.zeros(shape(theta_L1)) theta_output_grad = gpu.zeros(shape(theta_output)) a = (outputs - targets) * outputs * (1-outputs) theta_output_grad += gpu.dot(a, gpu.garray(transpose(hidden_activation_L1.as_numpy_array()))) b_temp = gpu.dot(gpu.garray(transpose(theta_output.as_numpy_array())),a) b = (b_temp*hidden_activation_L1)*(1-hidden_activation_L1) delta2 = gpu.dot(b, gpu.garray(transpose(inputs.as_numpy_array()))) theta_L1_grad += delta2[1:shape(delta2)[0], :] theta_L1_grad = theta_L1_grad/numCases theta_output_grad = theta_output_grad/numCases theta_output_grad[:,1:shape(theta_output_grad)[1]] = theta_output_grad[:,1:shape(theta_output_grad)[1]] + theta_output[:,1:shape(theta_output)[1]] * lambda_hidden theta_L1_grad[:,1:shape(theta_L1_grad)[1]] = theta_L1_grad[:,1:shape(theta_L1_grad)[1]] + theta_L1[:,1:shape(theta_L1)[1]] * lambda_hidden theta_output_grad = reshape(theta_output_grad.as_numpy_array(), num_weights_output) theta_L1_grad = reshape(theta_L1_grad.as_numpy_array(), num_weights_L1) del inputs del theta_L1 del hidden_sum_L1 del hidden_activation_L1 gpu.free_reuse_cache() return hstack((theta_L1_grad,theta_output_grad))
def test(opts): import editDistance as ed print "Testing model %s"%opts.inFile phone_map = get_phone_map_swbd() with open(opts.inFile,'r') as fid: old_opts = pickle.load(fid) _ = pickle.load(fid) _ = pickle.load(fid) loader = dl.DataLoader(opts.dataDir,old_opts.rawDim,old_opts.inputDim) if 'layers' not in dir(old_opts): old_opts.layers = [old_opts.layerSize]*old_opts.numLayers nn = nnet.NNet(old_opts.inputDim,old_opts.outputDim,old_opts.layers,train=False) nn.initParams() nn.fromFile(fid) totdist = numphones = 0 fid = open('hyp.txt','w') for i in range(1,opts.numFiles+1): data_dict,alis,keys,sizes = loader.loadDataFileDict(i) for k in keys: gp.free_reuse_cache() hyp = nn.costAndGrad(data_dict[k]) hyp = [phone_map[h] for h in hyp] ref = [phone_map[int(r)] for r in alis[k]] dist,ins,dels,subs,corr = ed.edit_distance(ref,hyp) print "Distance %d/%d"%(dist,len(ref)) fid.write(k+' '+' '.join(hyp)+'\n') totdist += dist numphones += len(alis[k]) fid.close() print "PER : %f"%(100*totdist/float(numphones))
def multilayer_feature_learning(data, inputSize, l1Size, l2Size, sparsityParam, lambda_val, beta): print "Now starting feature abstraction..." num_input = inputSize num_hidden_L1 = l1Size num_hidden_L2 = l2Size num_output_L1 = inputSize num_output_L2 = num_hidden_L1 sparsityParam = sparsityParam lambda_val = lambda_val beta = beta inputs = gpu.garray(data) r = gpu.sqrt(6)/gpu.sqrt(num_hidden_L1+num_input+1) weights1_L1 = (gpu.rand(num_hidden_L1,num_input+1))*2*r-r weights2_L1 = (gpu.rand(num_output_L1,num_hidden_L1+1))*2*r-r num_weights1_L1 = (num_input+1)*num_hidden_L1 num_weights2_L1 = (num_hidden_L1+1)*num_output_L1 weights1_L1 = weights1_L1.reshape(num_weights1_L1) weights2_L1 = weights2_L1.reshape(num_weights2_L1) weights_L1 = hstack((weights1_L1.as_numpy_array(),weights2_L1.as_numpy_array())) print "Level 1 Abstraction Starting...." weights_L1 = linear_decoder_run_ReLU(data, weights_L1, num_input, num_hidden_L1) weights1_L1 = weights_L1[0:num_weights1_L1].reshape((num_hidden_L1,num_input+1)) weights2_L1 = weights_L1[num_weights1_L1:shape(weights_L1)[0]].reshape((num_output_L1,num_hidden_L1+1)) scipy.io.savemat('HiggsBosonLevel1.mat', mdict={'learntFeaturesL1_1': weights1_L1, 'learntFeaturesL1_2': weights2_L1}) L1_activation = feedforward(weights1_L1, inputs) del weights_L1 del weights1_L1 del weights2_L1 gpu.free_reuse_cache() v = gpu.sqrt(6)/gpu.sqrt(num_hidden_L2+num_hidden_L1+1) weights1_L2 = (gpu.rand(num_hidden_L2,num_hidden_L1+1))*2*v-v weights2_L2 = (gpu.rand(num_output_L2,num_hidden_L2+1))*2*v-v num_weights1_L2 = (num_hidden_L1+1)*num_hidden_L2 num_weights2_L2 = (num_hidden_L2+1)*num_output_L2 weights1_L2 = weights1_L2.reshape(num_weights1_L2) weights2_L2 = weights2_L2.reshape(num_weights2_L2) weights_L2 = hstack((weights1_L2.as_numpy_array(),weights2_L2.as_numpy_array())) print "Level 2 Abstraction Starting...." weights_L2 = linear_decoder_run_ReLU(L1_activation, weights_L2, num_hidden_L1, num_hidden_L2) weights1_L2 = weights_L2[0:num_weights1_L2].reshape((num_hidden_L2,num_hidden_L1+1)) weights2_L2 = weights_L2[num_weights1_L2:shape(weights_L2)[0]].reshape((num_output_L2,num_hidden_L2+1)) scipy.io.savemat('HiggsBosonLevel2.mat', mdict={'learntFeaturesL2_1': weights1_L2,'learntFeaturesL2_2': weights2_L2}) L2_activation = feedforward(weights1_L2, L1_activation) del weights_L2 del weights1_L2 del weights2_L2 gpu.free_reuse_cache() gpu.free_reuse_cache() print "Abstraction completed." return L2_activation
def mlpSoftmax_costfunc(x, *args): numClasses, inputSize, l1Size, l2Size, l3Size, lambda_softmax, lambda_hidden, inputs, labels, groundTruth, dropout_probability = args numCases = shape(inputs)[1] num_weights_L1 = l1Size * (inputSize + 1) num_weights_L2 = l2Size * (l1Size + 1) num_weights_L3 = l3Size * (l2Size + 1) num_weights_softmax = numClasses * l3Size #x = gpu.garray(x) inputs = gpu.garray(inputs) theta_L1 = gpu.garray(reshape(x[0:num_weights_L1], (l1Size, inputSize + 1))) #theta_L1 = x[0:num_weights_L1].reshape((l1Size, inputSize + 1)) #print numClasses, l2Size theta_L2 = gpu.garray( reshape(x[num_weights_L1:num_weights_L2 + num_weights_L1], (l2Size, l1Size + 1))) #theta_L2 = x[num_weights_L1:num_weights_L2+num_weights_L1].reshape((l2Size, l1Size + 1)) theta_L3 = gpu.garray( reshape( x[num_weights_L2 + num_weights_L1:num_weights_L2 + num_weights_L1 + num_weights_L3], (l3Size, l2Size + 1))) theta_softmax = gpu.garray( reshape( x[num_weights_L2 + num_weights_L1 + num_weights_L3:shape(x)[0]], (numClasses, l3Size))) #theta_softmax = x[num_weights_L2+num_weights_L1:shape(x)[0]].reshape((numClasses, l2Size)) theta_L1_grad = gpu.zeros(shape(theta_L1)) theta_L2_grad = gpu.zeros(shape(theta_L2)) theta_L3_grad = gpu.zeros(shape(theta_L3)) dropout_l1 = gpu.garray( bernoulli.rvs(dropout_probability, size=(l1Size + 1, numCases))) dropout_l2 = gpu.garray( bernoulli.rvs(dropout_probability, size=(l2Size + 1, numCases))) dropout_l3 = gpu.garray( bernoulli.rvs(dropout_probability, size=(l3Size, numCases))) inputs = gpu.concatenate((gpu.ones((1, numCases)), inputs), axis=0) hidden_sum_L1 = gpu.dot(theta_L1, inputs) #hidden_activation_L1 = gpu.log(1+hidden_sum_L1.exp()) relu_mask_hidden1 = gpu.ones(shape(hidden_sum_L1)) * (hidden_sum_L1 > 0) hidden_activation_L1 = hidden_sum_L1 * relu_mask_hidden1 hidden_derivative_L1 = relu_mask_hidden1 #hidden_activation_L1 = gpu.concatenate((gpu.ones((1,numCases)), hidden_activation_L1), axis=0) hidden_derivative_L1 = gpu.concatenate((gpu.ones( (1, numCases)), hidden_derivative_L1), axis=0) hidden_activation_L1 = gpu.concatenate( (gpu.ones((1, numCases)), hidden_activation_L1), axis=0) * dropout_l1 hidden_sum_L2 = gpu.dot(theta_L2, hidden_activation_L1) #hidden_activation_L2 = gpu.log(1+hidden_sum_L2.exp()) relu_mask_hidden2 = gpu.ones(shape(hidden_sum_L2)) * (hidden_sum_L2 > 0) hidden_activation_L2 = hidden_sum_L2 * relu_mask_hidden2 hidden_derivative_L2 = relu_mask_hidden2 #hidden_activation_L2 = gpu.concatenate((gpu.ones((1,numCases)), hidden_activation_L2), axis=0) hidden_derivative_L2 = gpu.concatenate((gpu.ones( (1, numCases)), hidden_derivative_L2), axis=0) hidden_activation_L2 = gpu.concatenate( (gpu.ones((1, numCases)), hidden_activation_L2), axis=0) * dropout_l2 hidden_sum_L3 = gpu.dot(theta_L3, hidden_activation_L2) #hidden_activation_L3 = gpu.log(1+hidden_sum_L3.exp()) relu_mask_hidden3 = gpu.ones(shape(hidden_sum_L3)) * (hidden_sum_L3 > 0) #hidden_activation_L3 = hidden_sum_L3*relu_mask_hidden3 hidden_derivative_L3 = relu_mask_hidden3 hidden_activation_L3 = hidden_sum_L3 * relu_mask_hidden3 * dropout_l3 #hidden_activation_L3 = hidden_sum_L3.logistic() * dropout_l3 hidden_sum_softmax = gpu.dot(theta_softmax, hidden_activation_L3) hidden_sum_softmax = hidden_sum_softmax - hidden_sum_softmax.max(axis=0) predictions = hidden_sum_softmax.exp() predictions = predictions / gpu.sum(predictions, axis=0) pred = predictions.argmax(axis=0) + 1 accuracy = mean(pred == labels) * 100 temp = groundTruth * gpu.log(predictions) temp = temp.as_numpy_array() temp[temp == -inf] = -200.0 temp = nan_to_num(temp) regularized_penalty_L1 = theta_L1[:, 1:shape(theta_L1)[1]] regularized_penalty_L2 = theta_L2[:, 1:shape(theta_L2)[1]] regularized_penalty_L3 = theta_L3[:, 1:shape(theta_L3)[1]] regularized_penalty_L1 = regularized_penalty_L1 * regularized_penalty_L1 regularized_penalty_L2 = regularized_penalty_L2 * regularized_penalty_L2 regularized_penalty_L3 = regularized_penalty_L3 * regularized_penalty_L3 pred_cost = -1 * sum(temp) / numCases l2norm_cost = 0.5 * lambda_hidden * ( gpu.sum(regularized_penalty_L3) + gpu.sum(regularized_penalty_L2) + gpu.sum(regularized_penalty_L1)) + 0.5 * lambda_softmax * gpu.sum( theta_softmax * theta_softmax) #l2norm_cost = 0 cost = pred_cost + l2norm_cost print 'Prediction Accuracy: ', accuracy, '%' print 'Multilayer Softmax Prediction Cost: ', pred_cost print 'Multilayer Softmax L2 Normalisation Cost: ', l2norm_cost print 'Multilayer Softmax Cost: ', cost print '--------------------------------------------------------------------' softmax_imd = groundTruth - predictions #theta_softmax_grad = -1*gpu.dot(softmax_imd, gpu.garray(transpose(hidden_activation_L3.as_numpy_array())))/numCases theta_softmax_grad = -1 * gpu.dot( softmax_imd, gpu.garray(transpose(hidden_activation_L3.as_numpy_array())) ) / numCases + lambda_softmax * theta_softmax deltaOut = -softmax_imd delta_L3_imd = gpu.dot( gpu.garray(transpose(theta_softmax.as_numpy_array())), deltaOut) delta_L3_imd2 = delta_L3_imd * hidden_derivative_L3 #delta_L3_imd2 = (delta_L3_imd * hidden_activation_L3) * (1-hidden_activation_L3) delta_L3 = gpu.dot( delta_L3_imd2, gpu.garray(transpose(hidden_activation_L2.as_numpy_array()))) theta_L3_grad += delta_L3 delta_L2_imd = gpu.dot(gpu.garray(transpose(theta_L3.as_numpy_array())), delta_L3_imd2) delta_L2_imd2 = delta_L2_imd * hidden_derivative_L2 delta_L2_imd2 = delta_L2_imd2[1:shape(delta_L2_imd2)[0] + 1, :] delta_L2 = gpu.dot( delta_L2_imd2, gpu.garray(transpose(hidden_activation_L1.as_numpy_array()))) theta_L2_grad += delta_L2 delta_L1_imd = gpu.dot(gpu.garray(transpose(theta_L2.as_numpy_array())), delta_L2_imd2) delta_L1_imd2 = delta_L1_imd * hidden_derivative_L1 delta_L1_imd2 = delta_L1_imd2[1:shape(delta_L1_imd2)[0] + 1, :] delta_L1 = gpu.dot(delta_L1_imd2, gpu.garray(transpose(inputs.as_numpy_array()))) theta_L1_grad += delta_L1 theta_L1_grad = theta_L1_grad / numCases theta_L2_grad = theta_L2_grad / numCases theta_L3_grad = theta_L3_grad / numCases theta_L1_grad[:, 1:shape(theta_L1_grad)[1]] = theta_L1_grad[:, 1:shape( theta_L1_grad)[1]] + theta_L1[:, 1:shape(theta_L1)[1]] * lambda_hidden theta_L2_grad[:, 1:shape(theta_L2_grad)[1]] = theta_L2_grad[:, 1:shape( theta_L2_grad)[1]] + theta_L2[:, 1:shape(theta_L2)[1]] * lambda_hidden theta_L3_grad[:, 1:shape(theta_L3_grad)[1]] = theta_L3_grad[:, 1:shape( theta_L3_grad)[1]] + theta_L3[:, 1:shape(theta_L3)[1]] * lambda_hidden theta_L1_grad = reshape(theta_L1_grad.as_numpy_array(), num_weights_L1) theta_L2_grad = reshape(theta_L2_grad.as_numpy_array(), num_weights_L2) theta_L3_grad = reshape(theta_L3_grad.as_numpy_array(), num_weights_L3) theta_softmax_grad = reshape(theta_softmax_grad.as_numpy_array(), num_weights_softmax) del inputs del theta_L1 del theta_L2 del theta_L3 del theta_softmax del hidden_sum_L1 del hidden_activation_L1 del hidden_sum_L2 del hidden_activation_L2 del hidden_activation_L3 del hidden_sum_L3 del hidden_sum_softmax del predictions del temp del softmax_imd del deltaOut del delta_L3_imd del delta_L3_imd2 del delta_L3 del delta_L2_imd del delta_L2_imd2 del delta_L2 del delta_L1_imd del delta_L1_imd2 del delta_L1 #del regularized_penalty_L1 #del regularized_penalty_L2 gpu.free_reuse_cache() return cost, hstack( (theta_L1_grad, theta_L2_grad, theta_L3_grad, theta_softmax_grad))
def running(inputData): # multilayer_feature_learning(data, inputSize, l1Size, l2Size, l3Size, sparsityParam, lambda_val, beta) # inputSize = shape(win_data)[0] inputs = inputData inputSize = 96 * 96 l1Size = 10000 l2Size = 1024 l3Size = 196 sparsityParam = 0.1 lambda_val = 3e-3 beta = 3 multilayer_feature_learning(inputs, inputSize, l1Size, l2Size, l3Size, sparsityParam, lambda_val, beta) weights1 = scipy.io.loadmat('MINSTLevel1.mat')['learntFeaturesL1_1'] weights2 = scipy.io.loadmat('MINSTLevel2.mat')['learntFeaturesL2_1'] weights3 = scipy.io.loadmat('MINSTLevel3.mat')['learntFeaturesL3_1'] weights4 = scipy.io.loadmat('MINSTLevel3.mat')['learntFeaturesL3_2'] weights5 = scipy.io.loadmat('MINSTLevel2.mat')['learntFeaturesL2_2'] weights6 = scipy.io.loadmat('MINSTLevel1.mat')['learntFeaturesL1_2'] gpu.free_reuse_cache() # fine tuning phase print "Entering Final Stage: Fine Tuning the entire network..." num_input = inputSize num_hidden1 = l1Size num_hidden2 = l2Size num_hidden3 = l3Size num_hidden4 = l2Size num_hidden5 = l1Size num_output = num_input num_weights1 = (num_input+1)*num_hidden1 num_weights2 = (num_hidden1+1)*num_hidden2 num_weights3 = (num_hidden2+1)*num_hidden3 num_weights4 = (num_hidden3+1)*num_hidden4 num_weights5 = (num_hidden4+1)*num_hidden5 num_weights6 = (num_hidden5+1)*num_output weights1 = reshape(weights1, num_weights1) weights2 = reshape(weights2, num_weights2) weights3 = reshape(weights3, num_weights3) weights4 = reshape(weights4, num_weights4) weights5 = reshape(weights5, num_weights5) weights6 = reshape(weights6, num_weights6) weights = hstack((weights1,weights2,weights3,weights4,weights5,weights6)) # inputSize, l1Size, l2Size, l3Size, l4Size, l5Size, lambda_val, inputs = args print "Fine Tuning Starting..." stepSize = 14702 for i in range(int(shape(inputs)[1]/stepSize)): print "Batch:", i data = inputs[:,i*stepSize:(i+1)*stepSize] args = (num_input, num_hidden1, num_hidden2, num_hidden3, num_hidden4, num_hidden5, lambda_val, data) opttheta = optimize.fmin_l_bfgs_b(fine_tuning_cost_gpu, weights, fprime=fine_tuning_grad_gpu, args=args, maxiter=400) weights = opttheta[0] del opttheta gpu.free_reuse_cache() weights1 = reshape(weights[0:num_weights1], (l1Size, inputSize + 1)) weights2 = reshape(weights[num_weights1:num_weights1+num_weights2], (l2Size, l1Size + 1)) weights3 = reshape(weights[num_weights1+num_weights2:num_weights1+num_weights2+num_weights3], (l3Size, l2Size + 1)) weights4 = reshape(weights[num_weights1+num_weights2+num_weights3:num_weights1+num_weights2+num_weights3+num_weights4], (num_hidden4, num_hidden3 + 1)) weights5 = reshape(weights[num_weights1+num_weights2+num_weights3+num_weights4:num_weights1+num_weights2+num_weights3+num_weights4+num_weights5], (num_hidden5, num_hidden4 + 1)) weights6 = reshape(weights[num_weights1+num_weights2+num_weights3+num_weights4+num_weights5:shape(weights)[0]], (inputSize, num_hidden5+1)) scipy.io.savemat('MINST_FineTuned_features.mat', mdict={'learntFeaturesL1': weights1,'learntFeaturesL2': weights2, 'learntFeaturesL3': weights3, 'learntFeaturesL4': weights4, 'learntFeaturesL5': weights5, 'learntFeaturesL6': weights6}) trainData = scipy.io.loadmat('trainData.mat')['trainData'] train_weights1 = reshape(weights1, num_weights1) train_weights2 = reshape(weights2, num_weights2) train_weights3 = reshape(weights3, num_weights3) train_weights4 = reshape(weights4, num_weights4) train_weights5 = reshape(weights5, num_weights5) train_weights6 = reshape(weights6, num_weights6) train_weights = hstack((train_weights1,train_weights2,train_weights3,train_weights4,train_weights5,train_weights6)) args = (num_input, num_hidden1, num_hidden2, num_hidden3, num_hidden4, num_hidden5, lambda_val, trainData) opttheta = optimize.fmin_l_bfgs_b(fine_tuning_cost_gpu, train_weights, fprime=fine_tuning_grad_gpu, args=args, maxiter=400) train_weights = opttheta[0] del opttheta gpu.free_reuse_cache() train_weights1 = reshape(train_weights[0:num_weights1], (l1Size, inputSize + 1)) train_weights2 = reshape(train_weights[num_weights1:num_weights1+num_weights2], (l2Size, l1Size + 1)) train_weights3 = reshape(train_weights[num_weights1+num_weights2:num_weights1+num_weights2+num_weights3], (l3Size, l2Size + 1)) train_weights4 = reshape(train_weights[num_weights1+num_weights2+num_weights3:num_weights1+num_weights2+num_weights3+num_weights4], (num_hidden4, num_hidden3 + 1)) train_weights5 = reshape(train_weights[num_weights1+num_weights2+num_weights3+num_weights4:num_weights1+num_weights2+num_weights3+num_weights4+num_weights5], (num_hidden5, num_hidden4 + 1)) train_weights6 = reshape(train_weights[num_weights1+num_weights2+num_weights3+num_weights4+num_weights5:shape(weights)[0]], (inputSize, num_hidden5+1)) testData = scipy.io.loadmat('testData.mat')['testData'] nData = shape(testData)[1] x = concatenate((ones((1,nData)), testData), axis = 0) hidden1_sum = dot(train_weights1, x) hidden1_activation = 1/(1 + exp(-hidden1_sum)) hidden1_activation = concatenate((ones((1,nData)), hidden1_activation), axis = 0) hidden2_sum = dot(train_weights2, hidden1_activation) hidden2_activation = 1/(1 + exp(-hidden2_sum)) hidden2_activation = concatenate((ones((1,nData)), hidden2_activation), axis = 0) hidden3_sum = dot(train_weights3, hidden2_activation) hidden3_activation = 1/(1 + exp(-hidden3_sum)) hidden3_activation = concatenate((ones((1,nData)), hidden3_activation), axis = 0) hidden4_sum = dot(train_weights4, hidden3_activation) hidden4_activation = 1/(1 + exp(-hidden4_sum)) hidden4_activation = concatenate((ones((1,nData)), hidden4_activation), axis = 0) hidden5_sum = dot(train_weights5, hidden4_activation) hidden5_activation = 1/(1 + exp(-hidden5_sum)) hidden5_activation = concatenate((ones((1,nData)), hidden5_activation), axis = 0) output_sum = dot(train_weights6, hidden5_activation) outputs = 1/(1 + exp(-output_sum)) return outputs
def multilayer_feature_learning(data, inputSize, l1Size, l2Size, l3Size, sparsityParam, lambda_val, beta): print "Now starting feature abstraction..." num_input = inputSize num_hidden_L1 = l1Size num_hidden_L2 = l2Size num_hidden_L3 = l3Size num_output_L1 = inputSize num_output_L2 = num_hidden_L1 num_output_L3 = num_hidden_L2 sparsityParam = sparsityParam lambda_val = lambda_val beta = beta inputs = gpu.garray(data) r = gpu.sqrt(6)/gpu.sqrt(num_hidden_L1+num_input+1) weights1_L1 = (gpu.rand(num_hidden_L1,num_input+1))*2*r-r weights2_L1 = (gpu.rand(num_output_L1,num_hidden_L1+1))*2*r-r num_weights1_L1 = (num_input+1)*num_hidden_L1 num_weights2_L1 = (num_hidden_L1+1)*num_output_L1 #weights1_L1 = reshape(weights1_L1, num_weights1_L1) weights1_L1 = weights1_L1.reshape(num_weights1_L1) #weights2_L1 = reshape(weights2_L1, num_weights2_L1) weights2_L1 = weights2_L1.reshape(num_weights2_L1) weights_L1 = hstack((weights1_L1.as_numpy_array(),weights2_L1.as_numpy_array())) print "Level 1 Abstraction Starting...." args = (num_input, num_hidden_L1, num_output_L1, inputs, lambda_val, sparsityParam, beta) opttheta_L1 = optimize.fmin_l_bfgs_b(costfunc_gpu, weights_L1, fprime=grad_costfunc_gpu, args=args, maxiter=400) weights_L1 = gpu.garray(opttheta_L1[0]) #weights1_L1 = reshape(weights_L1[0:num_weights1_L1],(num_hidden_L1,num_input+1)) weights1_L1 = weights_L1[0:num_weights1_L1].reshape((num_hidden_L1,num_input+1)) #weights2_L1 = reshape(weights_L1[num_weights1_L1:shape(weights_L1)[0]],(num_hidden_L2,num_hidden_L1+1)) weights2_L1 = weights_L1[num_weights1_L1:shape(weights_L1)[0]].reshape((num_output_L1,num_hidden_L1+1)) scipy.io.savemat('MINSTLevel1.mat', mdict={'learntFeaturesL1_1': weights1_L1.as_numpy_array(), 'learntFeaturesL1_2': weights2_L1.as_numpy_array()}) L1_activation = feedforward(weights1_L1, inputs) del weights_L1 del weights1_L1 del weights2_L1 gpu.free_reuse_cache() v = gpu.sqrt(6)/gpu.sqrt(num_hidden_L2+num_hidden_L1+1) weights1_L2 = (gpu.rand(num_hidden_L2,num_hidden_L1+1))*2*v-v weights2_L2 = (gpu.rand(num_output_L2,num_hidden_L2+1))*2*v-v num_weights1_L2 = (num_hidden_L1+1)*num_hidden_L2 num_weights2_L2 = (num_hidden_L2+1)*num_output_L2 #weights1_L2 = reshape(weights1_L2, num_weights1_L2) weights1_L2 = weights1_L2.reshape(num_weights1_L2) #weights2_L2 = reshape(weights2_L2, num_weights2_L2) weights2_L2 = weights2_L2.reshape(num_weights2_L2) weights_L2 = hstack((weights1_L2.as_numpy_array(),weights2_L2.as_numpy_array())) args = (num_hidden_L1, num_hidden_L2, num_output_L2, L1_activation, lambda_val, sparsityParam, beta) print "Level 2 Abstraction Starting...." opttheta_L2 = optimize.fmin_l_bfgs_b(costfunc_gpu, weights_L2, fprime=grad_costfunc_gpu, args=args, maxiter=400) weights_L2 = gpu.garray(opttheta_L2[0]) #weights1_L2 = reshape(weights_L2[0:num_weights1_L2],(num_hidden_L2,num_hidden_L1+1)) weights1_L2 = weights_L2[0:num_weights1_L2].reshape((num_hidden_L2,num_hidden_L1+1)) weights2_L2 = weights_L2[num_weights1_L2:shape(weights_L2)[0]].reshape((num_output_L2,num_hidden_L2+1)) scipy.io.savemat('MINSTLevel2.mat', mdict={'learntFeaturesL2_1': weights1_L2.as_numpy_array(),'learntFeaturesL2_2': weights2_L2.as_numpy_array()}) L2_activation = feedforward(weights1_L2, L1_activation) del weights_L2 del weights1_L2 del weights2_L2 gpu.free_reuse_cache() u = gpu.sqrt(6)/gpu.sqrt(num_hidden_L3+num_hidden_L2+1) weights1_L3 = (gpu.rand(num_hidden_L3,num_hidden_L2+1))*2*u-u weights2_L3 = (gpu.rand(num_output_L3,num_hidden_L3+1))*2*u-u num_weights1_L3 = (num_hidden_L2+1)*num_hidden_L3 num_weights2_L3 = (num_hidden_L3+1)*num_output_L3 #weights1_L3 = reshape(weights1_L3, num_weights1_L3) weights1_L3 = weights1_L3.reshape(num_weights1_L3) #weights2_L3 = reshape(weights2_L3, num_weights2_L3) weights2_L3 = weights2_L3.reshape(num_weights2_L3) weights_L3 = hstack((weights1_L3.as_numpy_array(),weights2_L3.as_numpy_array())) args = (num_hidden_L2, num_hidden_L3, num_output_L3, L2_activation, lambda_val, sparsityParam, beta) print "Level 3 Abstraction Starting...." opttheta_L3 = optimize.fmin_l_bfgs_b(costfunc_gpu, weights_L3, fprime=grad_costfunc_gpu, args=args, maxiter=400) weights_L3 = gpu.garray(opttheta_L3[0]) #weights1_L3 = reshape(weights_L3[0:num_weights1_L3],(num_hidden_L3,num_hidden_L2+1)) weights1_L3 = weights_L3[0:num_weights1_L3].reshape((num_hidden_L3,num_hidden_L2+1)) weights2_L3 = weights_L3[num_weights1_L3:shape(weights_L3)[0]].reshape((num_output_L3,num_hidden_L3+1)) scipy.io.savemat('MINSTLevel3.mat', mdict={'learntFeaturesL3_1': weights1_L3.as_numpy_array(),'learntFeaturesL3_2': weights2_L3.as_numpy_array()}) L3_activation = feedforward(weights1_L3, L2_activation) del weights_L3 del weights1_L3 del weights2_L3 gpu.free_reuse_cache() print "Abstraction completed." return L3_activation
def _classify(path, name, frames, channels, target, choices, CellObject): gnp.free_reuse_cache() #GPU TO USE, WE HAVE 2, I PREFER IF YOU'RE USING GPU 0 #whole images take up a lot of memory so we need to coordinate this. # if you're not using the notebook or a script make sure to shutdown or restart the notebook # you can use nvidia-smi in terminal to see what process are running on the GPU gnp._useGPUid = 0 #protein localization categories localizationTerms=['ACTIN', 'BUDNECK', 'BUDTIP', 'CELLPERIPHERY', 'CYTOPLASM', 'ENDOSOME', 'ER', 'GOLGI', 'MITOCHONDRIA', 'NUCLEARPERIPHERY', 'NUCLEI', 'NUCLEOLUS', 'PEROXISOME', 'SPINDLE', 'SPINDLEPOLE', 'VACUOLARMEMBRANE', 'VACUOLE'] #normalization values (don't need to change) norm_vals = np.load('/home/morphology/mpg4/OrenKraus/Data_Sets/Yeast_Protein_Localization/Yolanda_Chong/overal_mean_std_for_single_cell_crops_based_on_Huh.npz') #may change to better model (constatly training bgnumpy.track_memory_usage=Trueetter networks) model_path = '/home/okraus/mil_models_backup/mil_models/Yeast_Protein_Localization/Yeast_NAND_a_10_scratch_Dropout_v5_MAP_early_stopping_best_model.npz' #load model and set evaluation type (MIL convolves across whole images) #change size curImages, sizes = getImageData(path, frames, channels) curImages = normalize_by_constant_values(curImages,norm_vals['means'],norm_vals['stdevs']) sizeX=sizes[1] sizeY=sizes[0] nn = modelEvalFunctions.loadResizedModel(model_path,sizeY,sizeX) model = modelEvalFunctions.evaluateModel_MIL(nn,localizationTerms,outputLayer='loc') nn.ForwardProp({'X0':gnp.garray(curImages)}) # GET RATIOS OF CLASSES #values of prediction maps above pred_maps = nn._layers['MIL_pool'].Z[target-1].as_numpy_array() #calculate relative activation of each map area = pred_maps.sum(1).sum(1) / pred_maps.sum() #calculate absolute area of each map (optional) area2 = pred_maps.sum(1).sum(1) / (pred_maps.shape[1]*pred_maps.shape[2]) #plot relative activations per class, use area or area2 area_lib = {} jacobian = getJacobian(nn,frames) plt.imshow(jacobian[target-1,0]) loc = str(settings.MEDIA_ROOT + '/classes/' + name.split('.')[0]+"_FULL0") save(loc) mahotas_segmentation = mahotas_clean_up_seg(jacobian,target-1) plt.imshow(mahotas_segmentation) loc = str(settings.MEDIA_ROOT + '/classes/' + name.split('.')[0]+"_FULL1") save(loc) show_segmentation_boundaries(curImages,mahotas_segmentation,target-1,sizeX, sizeY) loc = str(settings.MEDIA_ROOT + '/classes/' + name.split('.')[0]+"_FULL2") save(loc) top5indices = np.argsort(area)[::-1][:5] del jacobian del mahotas_segmentation for i in range(len(localizationTerms)): if i in top5indices: area_lib[localizationTerms[i]] = area[i] jacobian_per_class = getJacobian_per_class(nn,i,frames) im2show = mahotas_clean_up_seg(jacobian_per_class, target-1) overlay(curImages,im2show,target-1,sizeX, sizeY) loc = str(settings.MEDIA_ROOT + '/classes/' + name.split('.')[0]+"_"+localizationTerms[i]) save(loc) np.save(loc, im2show) continue if localizationTerms[i] not in choices: continue area_lib[localizationTerms[i]] = area[i] jacobian_per_class = getJacobian_per_class(nn,i,frames)[target-1] im2show = np.int8(np.log(1+jacobian_per_class[0])>0.1+np.int8(np.log(1+jacobian_per_class[1])>1))>0 im2show = mh.dilate(mh.dilate(mh.dilate(mh.erode(mh.erode(mh.erode(im2show>0)))))) overlay(curImages,im2show,target-1,sizeX, sizeY) loc = str(settings.MEDIA_ROOT + '/classes/' + name.split('.')[0]+"_"+localizationTerms[i]) save(loc) np.save(loc, im2show) del nn del model gnp.free_reuse_cache() f = [['Class', 'Area']] for key in area_lib: f.append([str(key), area_lib[key]]) CellObject.activations = f CellObject.save() from openpyxl import Workbook wb = Workbook() ws = wb.active for arr in f: ws.append(arr) wb.save(settings.MEDIA_ROOT + '/classes/' + name.split('.')[0] + '.xlsx') if CellObject.email != '': send_mail('Deep Cell Vision', 'Your image has been classified. Go to http://deepcellvision.com/results/' +CellObject.name + ' to see your results' , '*****@*****.**', [CellObject.email], fail_silently=False) return
#filename = './data/sixteenth_note_bass_drum_patterns.mid' filename = './data/blast_beat.mid' start_beat = 1 offset = 8*16 T = max(model.Tv,model.Th) seed_data = data_helper.get_seed_pattern(filename,model,offset) sequence = model.generate(seed_data,num_steps,K,start_beat,noisy).reshape((-1,model.Nv)).T/model.vis_scale ##sequence = drum_matrix beats = midi_tools.label_drum_matrix(sequence.shape[1],period=16,offset=0) # #quarters = plot_vstack_beat(sequence,beats) sequence = sequence.as_numpy_array() pl.clf() pl.imshow(sequence,origin='lower') pl.show() ref_midi_file = filename output_dir = './output/' tatums_per_beat = 4 output_filename = 'generated_%u.midi' % np.random.randint(100000) midioutput = midi_tools.drum_matrix_to_midi(sequence,tatums_per_beat, output_dir+output_filename,ref_midi_file) print output_filename gp.free_reuse_cache()
def backprop_gradient(self, v, network, X, targets, weights): ''' Calculates the value of the cost function and the gradient for CG optimization. args: array v: the 1d vector of weights list[obj] network: the network array X: training data array targets: the training targets array weights: the backprop weights returns: array cost: the value of the cost function array grad: the value of the gradient This function is called by scipy's minimize function during optimization ''' if len(v.shape) == 1: v = v.reshape((v.shape[0],1)) # initialize variables n = X.shape[0] numHiddenLayers = len(network) # put the v weights back into the network ind =0 for i in range(numHiddenLayers): h,w = network[i].W.shape network[i].W = gp.garray((v[ind:(ind+h*w)]).reshape((h,w))) ind += h*w b = network[i].hbias.shape[0] network[i].hbias = gp.garray(v[ind:(ind+b)]).reshape((b,1)) ind += b # Run data through the network, keeping activations of each layer acts = [X] # a list of numpy arrays hid = X for layer in network: vis = gp.garray(hid) hid = self.get_activation(layer, vis) acts.append(hid) gp.free_reuse_cache() # store the gradients dW = [] db = [] # Compute the value of the cost function if self.targetCost == 'crossEntropy': # see www.stanford.edu/group/pdplab/pdphandbook/handbookch6.html cost = (-1.0/n) * np.sum(np.sum(targets * np.log(acts[-1]) + \ (1.0 - targets) * np.log(1.0 - acts[-1]), axis=1) * weights.T) Ix = (acts[-1] - targets) / n else: #self.targetCost == 'linSquaredErr': cost = 0.5 * np.sum(np.sum(np.square(acts[-1] - targets), axis=1) * \ weights.T) Ix = (acts[-1] - targets) Ix *= np.tile(weights, (1, Ix.shape[1])).reshape((Ix.shape[0],Ix.shape[1])) Ix = gp.garray(Ix) # Compute the gradients for i in range(numHiddenLayers-1,-1,-1): # augment activations with ones acts[i] = gp.garray(acts[i]) acts[i] = gp.concatenate((acts[i], gp.ones((n,1))), axis=1) # compute delta in next layer delta = gp.dot(acts[i].T, Ix) # split delta into weights and bias parts dW.append(delta[:-1,:].T) db.append(delta[-1,:].T) # backpropagate the error if i > 0: if network[i-1].hidtype == 'sigmoid': Ix = gp.dot(Ix,gp.concatenate((network[i].W,network[i].hbias), axis=1)) * acts[i] * (1.0 - acts[i]) elif network[i-1].hidtype == 'gaussian': Ix = gp.dot(Ix,gp.concatenate((network[i].W,network[i].hbias), axis=1)) Ix = Ix[:,:-1] gp.free_reuse_cache() dW.reverse() db.reverse() # Convert gradient information grad = np.zeros_like(v) ind = 0 for i in range(numHiddenLayers): grad[ind:(ind+dW[i].size)] = \ (dW[i].reshape((dW[i].shape[0]*dW[i].shape[1],1))).as_numpy_array() ind += dW[i].size grad[ind:(ind+db[i].size),0] = db[i].as_numpy_array() ind += db[i].size grad = grad.reshape((grad.shape[0],)) return cost, grad
def train(self): config = self.config # convert t into a matrix in 1-of-K representation if it is a vector t = self.train_data.T T_matrix = self.output.act_type.label_vec_to_mat(t, self.train_data.K) layer_config = LayerConfig() layer_config.learn_rate = config.learn_rate layer_config.momentum = config.init_momentum layer_config.weight_decay = config.weight_decay nnstore = NNStore() nnstore.init_from_net(self) best_net = NNStore() best_net.init_from_net(self) train_acc, val_acc, test_acc = self.display_training_info( -1, self._compute_loss( self.train_data.X, T_matrix, config.minibatch_size), 0) acc_rec = np.zeros((config.num_epochs / config.epoch_to_display + 1, 4)) acc_rec[0, 0] = 0 acc_rec[0, 1] = train_acc if config.is_val: acc_rec[0, 2] = val_acc if config.is_test: acc_rec[0, 3] = test_acc t_start = time.time() best_acc = val_acc if self.config.is_test: best_test_acc = test_acc best_epoch = -1 for epoch in range(0, config.num_epochs): gnp.free_reuse_cache() # decrease learning rate over time layer_config.learn_rate = config.learn_rate / \ (epoch / config.lr_drop_rate + 1) # TODO [dirty] special for Lnsvm if isinstance(self.output.act_type, act.LnsvmVariantOutput): #self.output.act_type.n = 3.0 - (3.0 - 0.5) / 50 * epoch self.output.act_type.n = 0.5 if self.output.act_type.n < 0.5: self.output.act_type.n = 0.5 if (epoch + 1) % config.epoch_to_display == 0: print 'n %.4f' % self.output.act_type.n, if epoch >= config.switch_epoch: layer_config.momentum = config.final_momentum # shuffle the dataset idx = np.random.permutation(self.num_total_cases) #idx = np.arange(self.num_total_cases) train_X = self.train_data.X[idx] train_T = T_matrix[idx] if config.input_noise > 0: train_X = train_X * (gnp.rand(train_X.shape) > config.input_noise) # train_X = train_X + gnp.randn(train_X.shape) * config.input_noise loss = 0 for batch in range(0, self.num_minibatches): i_start = batch * config.minibatch_size if not batch == self.num_minibatches - 1: i_end = i_start + config.minibatch_size else: i_end = self.num_total_cases X = train_X[i_start:i_end] T = train_T[i_start:i_end] # forward pass self._forward(X) # compute loss loss += self.output.loss(T) if self.output.Y.isnan().any(): import ipdb ipdb.set_trace() print 'batch #%d <-- nan' % batch # backprop dLdXabove = self.output.backprop(layer_config) for i in range(self.num_layers-1, -1, -1): dLdXabove = self.layer[i].backprop(dLdXabove, layer_config) # statistics avg_loss = 1.0 * loss / self.num_total_cases if (epoch + 1) % config.epoch_to_display == 0: train_acc, val_acc, test_acc = self.display_training_info( epoch, avg_loss, time.time() - t_start) if val_acc == None: val_acc = train_acc if (config.show_task_loss and val_acc < best_acc) or \ (not config.show_task_loss and val_acc > best_acc): best_acc = val_acc best_net.update_from_net(self) if config.is_test: best_test_acc = test_acc best_epoch = epoch t_start = time.time() acc_rec[(epoch + 1) / config.epoch_to_display, 0] = epoch + 1 acc_rec[(epoch + 1) / config.epoch_to_display, 1] = train_acc if config.is_val: acc_rec[(epoch + 1) / config.epoch_to_display, 2] = val_acc if config.is_test: acc_rec[(epoch + 1) / config.epoch_to_display, 3] = test_acc if (epoch + 1) % config.epoch_to_save == 0: nnstore.update_from_net(self) nnstore.write(config.output_dir + '/m' + str(epoch + 1) + '.pdata') print '----------------------------------------------------------------' if config.show_task_loss: s = 'loss' else: s = 'acc' if config.is_val: print 'Best val_%s %.4f' % (s, best_acc), else: print 'Best train_%s %.4f' % (s, best_acc), if config.is_test: print '--> test_%s %.4f' % (s, best_test_acc), print 'at epoch %d' % (best_epoch + 1) if config.is_output: f = open('%s/acc_rec.pdata' % config.output_dir, 'w') pickle.dump(acc_rec, f, -1) f.close() self.write_config('%s/cfg.txt' % config.output_dir) # save the best net fname = config.output_dir + '/best_net.pdata' print 'Saving the best model to ' + fname best_net.write(fname) if config.is_test: return (best_acc, best_test_acc) else: return (best_acc)
def mlpSoftmax_grad(x, *args): numClasses, inputSize, l1Size, l2Size, lambda_softmax,lambda_hidden, inputs, labels, groundTruth = args numCases = shape(inputs)[1] num_weights_L1 = l1Size * (inputSize + 1) num_weights_L2 = l2Size * (l1Size + 1) num_weights_softmax = numClasses * l2Size #x = gpu.garray(x) inputs = gpu.garray(inputs) theta_L1 = gpu.garray(reshape(x[0:num_weights_L1], (l1Size, inputSize + 1))) theta_L2 = gpu.garray(reshape(x[num_weights_L1:num_weights_L2+num_weights_L1], (l2Size, l1Size + 1))) theta_softmax = gpu.garray(reshape(x[num_weights_L2+num_weights_L1:shape(x)[0]], (numClasses, l2Size))) theta_L1_grad = gpu.zeros(shape(theta_L1)) theta_L2_grad = gpu.zeros(shape(theta_L2)) inputs = gpu.concatenate((gpu.ones((1,numCases)), inputs), axis = 0) hidden_sum_L1 = gpu.dot(theta_L1, inputs) hidden_activation_L1 = hidden_sum_L1.logistic() hidden_activation_L1 = gpu.concatenate((gpu.ones((1,numCases)), hidden_activation_L1), axis=0) hidden_sum_L2 = gpu.dot(theta_L2, hidden_activation_L1) hidden_activation_L2 = hidden_sum_L2.logistic() hidden_sum_softmax_imd = gpu.dot(theta_softmax, hidden_activation_L2) hidden_softmax_activation = hidden_sum_softmax_imd.logistic() hidden_sum_softmax = hidden_sum_softmax_imd - hidden_sum_softmax_imd.max(axis = 0) predictions = hidden_sum_softmax.exp() predictions = predictions / gpu.sum(predictions,axis = 0) softmax_imd = groundTruth - predictions theta_softmax_grad = -1*gpu.dot(softmax_imd, gpu.garray(transpose(hidden_activation_L2.as_numpy_array())))/numCases + lambda_softmax * theta_softmax deltaOut = -softmax_imd delta_L2_imd = gpu.dot(gpu.garray(transpose(theta_softmax.as_numpy_array())), deltaOut) #delta_L2_imd2 = multiply(multiply(delta_L2_imd, hidden_activation_L2), (1-hidden_activation_L2)) delta_L2_imd2 = (delta_L2_imd*hidden_activation_L2)*(1-hidden_activation_L2) delta_L2 = gpu.dot(delta_L2_imd2, gpu.garray(transpose(hidden_activation_L1.as_numpy_array()))) theta_L2_grad += delta_L2 delta_L1_imd = gpu.dot(gpu.garray(transpose(theta_L2.as_numpy_array())), delta_L2_imd2) #delta_L1_imd2 = multiply(multiply(delta_L1_imd, hidden_activation_L1), (1-hidden_activation_L1)) delta_L1_imd2 = (delta_L1_imd*hidden_activation_L1)*(1-hidden_activation_L1) delta_L1 = gpu.dot(delta_L1_imd2, gpu.garray(transpose(inputs.as_numpy_array()))) theta_L1_grad += delta_L1[1:shape(theta_L1)[0]+1,:] theta_L1_grad = theta_L1_grad/numCases theta_L2_grad = theta_L2_grad/numCases theta_L1_grad[:, 1:shape(theta_L1_grad)[1]] = theta_L1_grad[:, 1:shape(theta_L1_grad)[1]] + theta_L1[:, 1: shape(theta_L1)[1]] * lambda_hidden theta_L2_grad[:, 1:shape(theta_L2_grad)[1]] = theta_L2_grad[:, 1:shape(theta_L2_grad)[1]] + theta_L2[:, 1: shape(theta_L2)[1]] * lambda_hidden theta_L1_grad = reshape(theta_L1_grad.as_numpy_array(), num_weights_L1) theta_L2_grad = reshape(theta_L2_grad.as_numpy_array(), num_weights_L2) theta_softmax_grad = reshape(theta_softmax_grad.as_numpy_array(), num_weights_softmax) del inputs del theta_L1 del theta_L2 del theta_softmax del hidden_sum_L1 del hidden_activation_L1 del hidden_sum_L2 del hidden_activation_L2 del hidden_sum_softmax del predictions del softmax_imd del deltaOut del delta_L2_imd del delta_L2_imd2 del delta_L2 del delta_L1_imd del delta_L1_imd2 del delta_L1 gpu.free_reuse_cache() return hstack((theta_L1_grad,theta_L2_grad, theta_softmax_grad))
def kmeans(X, K, init='plus', dist='euclidean', empty_action='singleton', max_iters=100, verbose=True): """ X: NxD dataset, each row is one data point. init: method to choose initial cluster centers. Available options: { 'plus': k-means++, 'sample': randomly sample K data points, 'random': generate K points uniformly at random from X's range } dist: distance metric to be used. Available options: { 'euclidean': Euclidean distance. } empty_action: action to take when one cluster lost all its members. Available options: { 'singleton': create a new cluster to replace it using a point furthest to the current center. 'error': raise an exception. } max_iters: maximum number of iterations to run. verbose: if False, nothing will be printed during training. Return: C: KxD matrix, cluster centers, each row is one center idx: N-d vector, cluster assignments for each data point. loss: sum of distances for the dataset under the given distance metric. """ t_start = time.time() gnp.free_reuse_cache() gnp.max_memory_usage = 3.8 * 1000 * 1000 * 1000 def f_print(s, newline=True): if verbose: if newline: print s else: print s, else: pass f_print('Initializing k-means...', newline=False) X = gnp.as_garray(X) X_cpu = X.asarray().astype(np.float64) if isinstance(init, str): f_init = choose_initializer(init) C = f_init(X, K, dist=dist) elif isinstance(init, gnp.garray) or isinstance(init, np.ndarray): C = gnp.as_garray(init) print '[Warning] Init centers provided, K and init not used.' K = C.shape[0] f_dist = choose_distance_metric(dist) loss = 0 idx = None prev_idx = None full_idx = np.arange(X.shape[0]) f_print('done [%.2fs]' % (time.time() - t_start)) t_start = time.time() i_iter = 0 while i_iter <= max_iters: gnp.free_reuse_cache() f_print('iter %d,' % i_iter, newline=False) # use GPU to compute distance because it is fast, # bug go back to CPU to avoid low precision problem D = f_dist(X, C).asarray().astype(np.float64) idx = D.argmin(axis=1) loss = D[full_idx, idx].sum() if prev_idx is not None and (idx == prev_idx).all(): print '** k-means converged **' break else: prev_idx = idx # update cluster center do_restart = False for k in xrange(K): k_idx = full_idx[idx == k] if k_idx.size == 0: if empty_action == 'singleton': # update C C[k] = X[f_dist(X, C[k:k+1]).ravel().argmax()] do_restart = True elif empty_action == 'error': raise Exception('Empty cluster encountered in k-means!') else: raise Exception('Action not specified for empty cluster.') else: C[k] = X_cpu[k_idx].mean(axis=0) f_print('loss=%.2f, [%.2fs]' % (loss, time.time() - t_start)) if do_restart: print '[Warning] restarting because empty clusters encountered.' i_iter = 0 t_start = time.time() i_iter += 1 return C, idx, loss
def garbage_collect(): global _gnumpy_loaded if _gnumpy_loaded: gp.free_reuse_cache(True) gc.collect()