def main(): x = num.random.randn(5, 64) x = x.astype("float32") xGPU = cm.CUDAMatrix(reformat(x)) r = cpuSoftmax(x) print r.dtype tempCol = cm.CUDAMatrix(reformat(num.zeros((xGPU.shape[0], 1)))) tempRow = cm.CUDAMatrix(reformat(num.zeros((1, xGPU.shape[1])))) #singleSoftmax(xGPU, tempCol, tempRow) singleSoftmax(xGPU, tempRow) xGPU.copy_to_host() diff = xGPU.numpy_array - r print num.sum(num.abs(diff)) #testMaskedSM() col = cm.CUDAMatrix(reformat(num.random.rand(5, 1))) print col.shape col.copy_to_host() print col.numpy_array col.reshape((1, 5)) print col.shape col.copy_to_host() print col.numpy_array garb = cm.CUDAMatrix(reformat(num.zeros((5, 5)))) garb.set_row_slice(2, 3, col) garb.copy_to_host() print garb.numpy_array
def trainLowMemory(self, data, index, numEpochs, reportMB = False): assert(data.dtype == num.dtype('float32')) numcases = len(index) num_mini_batches = numcases / self.mbsz indexPerm = num.random.permutation(range(numcases)) noise = cm.CUDAMatrix(reformat(num.zeros((self.numVis, self.mbsz)))) for ep in range(numEpochs): recErr = 0 for mb in range(num_mini_batches): mbIndex = index[ indexPerm[mb*self.mbsz:(mb+1)*self.mbsz] ] curInputsMB_CPU = data[:, mbIndex] curPastMB_CPU = [data[:, mbIndex-i-1] for i in range(self.numPrev)] if self.pastNoiseSM > 0: for i in range(self.numPrev): smNoise = (self.pastNoiseSM/self.smsz)*num.random.rand(self.smsz, self.mbsz) #smNoise[0,:] = 0 #smNoise /= self.smsz-1 curPastMB_CPU[i][:self.smsz,:] = (curPastMB_CPU[i][:self.smsz,:] + smNoise)/(1+self.pastNoiseSM) curInputsMB = cm.CUDAMatrix(reformat(curInputsMB_CPU)) curPastMB = [cm.CUDAMatrix(reformat(p)) for p in curPastMB_CPU] if self.pastNoise > 0: for i in range(self.numPrev): noise.fill_with_randn() noise.mult(self.gaussMask) curPastMB[i].add_mult(noise, self.pastNoise) self.step(curInputsMB, curPastMB) recErr += self.curRecErr() if reportMB: yield (mb, num_mini_batches) yield recErr
def copy_reordered_column_vectors_to_reordered_columns(self, orderIn, orderOut, start, end, target): """ Copies columns from self into target. The source columns are copied in the order specified by indices=orderIn[start .. end]. The target column orders are similarly specified by indices=orderOut[start .. end]. DJ April 19. 2011. """ if not target: raise CUDAMatException("target not specified. target cannot be null") if isinstance(orderIn, CUDAMatrix): orderInMat = orderIn else: orderInMat = cudamat.CUDAMatrix(cudamat.reformat(orderIn)) if isinstance(orderOut, CUDAMatrix): orderOutMat = orderOut else: orderOutMat = cudamat.CUDAMatrix(cudamat.reformat(orderOut)) err_code = _cudamat_ext.copy_reordered_column_vectors_to_reordered_columns(\ self.p_mat, orderInMat.p_mat,\ target.p_mat, orderOutMat.p_mat, \ ct.c_int(start), ct.c_int(end)) if err_code: raise generate_exception(err_code) return self
def rbmHtoV(m, X) : """convey data fron hidden layer to visible layer""" cm.cublas_init() # copy data to GPU data = cm.CUDAMatrix(cm.reformat(X)) weight = cm.CUDAMatrix(cm.reformat(m.weight)) biasV = cm.CUDAMatrix(cm.reformat(m.biasV)) nCase = X.shape[0] nVis = biasV.asarray().size VisActP = cm.CUDAMatrix(np.zeros((nCase, nVis))) if m.type == "BB" : cm.dot(data, weight.T, target = VisActP) VisActP.add_row_vec(biasV) VisActP.apply_sigmoid() elif m.type == "BG" : cm.dot(data, weight.T, target = VisActP) VisActP.add_row_vec(biasV) elif m.type == "GB" : pass result = VisActP.asarray() #free device memory data.free_device_memory() weight.free_device_memory() biasV.free_device_memory() VisActP.free_device_memory() cm.shutdown() return result
def trainLowMemory(self, data, index, numEpochs, reportMB = False): assert(data.dtype == num.dtype('float32')) numcases = len(index) num_mini_batches = numcases / self.mbsz indexPerm = num.random.permutation(range(numcases)) noise = cm.CUDAMatrix(reformat(num.zeros((self.numVis, self.mbsz)))) noiseThresh = cm.CUDAMatrix(reformat(num.zeros((self.numVis, self.mbsz)))) noiseThresh.assign_scalar(1.0-self.pastNoise) for ep in range(numEpochs): recErr = 0 for mb in range(num_mini_batches): mbIndex = index[ indexPerm[mb*self.mbsz:(mb+1)*self.mbsz] ] curInputsMB_CPU = data[:, mbIndex] curPastMB_CPU = [data[:, mbIndex-i-1] for i in range(self.numPrev)] curInputsMB = cm.CUDAMatrix(reformat(curInputsMB_CPU)) curPastMB = [cm.CUDAMatrix(reformat(p)) for p in curPastMB_CPU] for i in range(self.numPrev): if self.pastNoise > 0 and not self.samplePast: noise.fill_with_rand() noise.less_than(noiseThresh, target = noise) curPastMB[i].mult(noise) if self.samplePast: noise.fill_with_rand() noise.less_than(curPastMB[i], target = curPastMB[i]) self.step(curInputsMB, curPastMB) recErr += self.curRecErr() if reportMB: yield (mb, num_mini_batches) yield recErr
def reconstructions(self, past, hiddens, onGPU = False): """ We assume we have an integer number of minibatches. """ #we return an array numVis by floor(numcases/mbsz) if onGPU: pastGPU = past hiddensGPU = hiddens else: pastGPU = [cm.CUDAMatrix(reformat(p)) for p in past] hiddensGPU = cm.CUDAMatrix(reformat(hiddens)) numcases = hiddensGPU.numpy_array.shape[1] num_mini_batches = numcases / self.mbsz recons = [] for i in range(num_mini_batches): self.past = [p.slice(i*self.mbsz, (i+1)*self.mbsz) for p in pastGPU] self.hActs = hiddensGPU.slice(i*self.mbsz, (i+1)*self.mbsz) self.visActProbs(True) self.negVis.copy_to_host() recons.append(self.negVis.numpy_array.copy()) return num.hstack(recons)
def predictions(self, inp, past, sample=False): """ This function assumes inp and past reside on the cpu. It returns a numpy array. We assume an integer number of minibatches and any cases beyond mbsz*floor(numcases/mbsz) are ignored. """ #we return an array numHid by floor(numcases/mbsz) pred = [] numcases = inp.shape[1] num_mini_batches = numcases / self.mbsz for i in range(num_mini_batches): idx = i * self.mbsz self.vis = cm.CUDAMatrix(reformat(inp[:, idx:idx + self.mbsz])) self.past = [ cm.CUDAMatrix(reformat(p[:, idx:idx + self.mbsz])) for p in past ] self.hidActProbs() if sample: self.sampleHiddens(self.hActProbs) self.hActs.copy_to_host() pred.append(self.hActs.numpy_array.copy()) else: self.hActProbs.copy_to_host() pred.append(self.hActProbs.numpy_array.copy()) return num.hstack(pred)
def reconstructions(self, past, hiddens, onGPU=False): """ We assume we have an integer number of minibatches. """ #we return an array numVis by floor(numcases/mbsz) if onGPU: pastGPU = past hiddensGPU = hiddens else: pastGPU = [cm.CUDAMatrix(reformat(p)) for p in past] hiddensGPU = cm.CUDAMatrix(reformat(hiddens)) numcases = hiddensGPU.numpy_array.shape[1] num_mini_batches = numcases / self.mbsz recons = [] for i in range(num_mini_batches): self.past = [ p.slice(i * self.mbsz, (i + 1) * self.mbsz) for p in pastGPU ] self.hActs = hiddensGPU.slice(i * self.mbsz, (i + 1) * self.mbsz) self.visActProbs(True) self.negVis.copy_to_host() recons.append(self.negVis.numpy_array.copy()) return num.hstack(recons)
def trainLowMemory(self, data, index, numEpochs, reportMB=False): assert (data.dtype == num.dtype('float32')) numcases = len(index) num_mini_batches = numcases / self.mbsz indexPerm = num.random.permutation(range(numcases)) noise = cm.CUDAMatrix(reformat(num.zeros((self.numVis, self.mbsz)))) for ep in range(numEpochs): recErr = 0 for mb in range(num_mini_batches): mbIndex = index[indexPerm[mb * self.mbsz:(mb + 1) * self.mbsz]] curInputsMB_CPU = data[:, mbIndex] curPastMB_CPU = [ data[:, mbIndex - i - 1] for i in range(self.numPrev) ] curInputsMB = cm.CUDAMatrix(reformat(curInputsMB_CPU)) curPastMB = [cm.CUDAMatrix(reformat(p)) for p in curPastMB_CPU] if self.pastNoise > 0: for i in range(self.numPrev): noise.fill_with_randn() curPastMB[i].add_mult(noise, self.pastNoise) self.step(curInputsMB, curPastMB) recErr += self.curRecErr() if reportMB: yield (mb, num_mini_batches) yield recErr
def predictions(self, inp, past, sample = False): """ This function assumes inp and past reside on the cpu. It returns a numpy array. We assume an integer number of minibatches and any cases beyond mbsz*floor(numcases/mbsz) are ignored. """ #we return an array numHid by floor(numcases/mbsz) pred = [] numcases = inp.shape[1] num_mini_batches = numcases / self.mbsz for i in range(num_mini_batches): idx = i*self.mbsz self.vis = cm.CUDAMatrix(reformat(inp[:,idx:idx+self.mbsz])) self.past = [ cm.CUDAMatrix(reformat(p[:,idx:idx+self.mbsz])) for p in past ] self.hidActProbs() if sample: self.sampleHiddens(self.hActProbs) self.hActs.copy_to_host() pred.append(self.hActs.numpy_array.copy()) else: self.hActProbs.copy_to_host() pred.append(self.hActProbs.numpy_array.copy()) return num.hstack(pred)
def rbmHtoV(m, X): """convey data fron hidden layer to visible layer""" cm.cublas_init() # copy data to GPU data = cm.CUDAMatrix(cm.reformat(X)) weight = cm.CUDAMatrix(cm.reformat(m.weight)) biasV = cm.CUDAMatrix(cm.reformat(m.biasV)) nCase = X.shape[0] nVis = biasV.asarray().size VisActP = cm.CUDAMatrix(np.zeros((nCase, nVis))) if m.type == "BB": cm.dot(data, weight.T, target=VisActP) VisActP.add_row_vec(biasV) VisActP.apply_sigmoid() elif m.type == "BG": cm.dot(data, weight.T, target=VisActP) VisActP.add_row_vec(biasV) elif m.type == "GB": pass result = VisActP.asarray() #free device memory data.free_device_memory() weight.free_device_memory() biasV.free_device_memory() VisActP.free_device_memory() cm.shutdown() return result
def main(): x = num.random.randn(5,64) x = x.astype("float32") xGPU = cm.CUDAMatrix(reformat(x)) r = cpuSoftmax(x) print r.dtype tempCol = cm.CUDAMatrix(reformat(num.zeros((xGPU.shape[0],1)))) tempRow = cm.CUDAMatrix(reformat(num.zeros((1,xGPU.shape[1])))) #singleSoftmax(xGPU, tempCol, tempRow) singleSoftmax(xGPU, tempRow) xGPU.copy_to_host() diff = xGPU.numpy_array-r print num.sum(num.abs(diff)) #testMaskedSM() col = cm.CUDAMatrix(reformat(num.random.rand(5,1))) print col.shape col.copy_to_host() print col.numpy_array col.reshape((1,5)) print col.shape col.copy_to_host() print col.numpy_array garb = cm.CUDAMatrix(reformat(num.zeros((5,5)))) garb.set_row_slice(2,3,col) garb.copy_to_host() print garb.numpy_array
def init_weight_storage(self): """ Initialize storage for gradients and gradient steps and build a list of weight/gradient/energy gradient/step tuples. """ for name in self.weightVariableNames(): w = self.__dict__[name] if not isinstance(w, list): self.__dict__[name] = cm.CUDAMatrix(reformat(w)) self.__dict__["d"+name] = cm.CUDAMatrix(reformat(0.0 * w)) else: self.__dict__[name] = [cm.CUDAMatrix(reformat(x)) for x in w] self.__dict__["d"+name] = [cm.CUDAMatrix(reformat(0.0*part)) for part in w]
def updateSignOfWeights(self): """ We need the sign of the weights for L1 regularization. Since we work on the GPU it is convenient to just allocate storage for these things once and periodically update the sign variables when the weights they depend on have changed and we need to know the signs. """ if self.signVisToFact == None: self.signVisToFact = cm.CUDAMatrix(reformat(num.zeros((self.numVis, self.numFact)))) if self.signFactToHid == None: #probably not really needed since we constrain it to be negative self.signFactToHid = cm.CUDAMatrix(reformat(num.zeros((self.numFact, self.numHid)))) self.visToFact.sign(target = self.signVisToFact) self.factToHid.sign(target = self.signFactToHid)
def loadWeights(self, wDict): """ This code is terrible. """ assert(all(wName in wDict for wName in self.weightVariableNames())) for w_name in wDict: if w_name in self.weightVariableNames(): w = wDict[w_name] if isinstance(w, list) or w_name in ["A","B"]: assert( all(self.__dict__[w_name][i].numpy_array.shape == wDict[w_name][i].shape for i in range(len(wDict[w_name])) ) ) self.__dict__[w_name] = [cm.CUDAMatrix(reformat(part)) for part in w] else: assert( self.__dict__[w_name].numpy_array.shape == wDict[w_name].shape ) self.__dict__[w_name] = cm.CUDAMatrix(reformat(w))
def init_weight_storage(self): """ Initialize storage for gradients and gradient steps and build a list of weight/gradient/energy gradient/step tuples. """ for name in self.weightVariableNames(): w = self.__dict__[name] if not isinstance(w, list): self.__dict__[name] = cm.CUDAMatrix(reformat(w)) self.__dict__["d" + name] = cm.CUDAMatrix(reformat(0.0 * w)) else: self.__dict__[name] = [cm.CUDAMatrix(reformat(x)) for x in w] self.__dict__["d" + name] = [ cm.CUDAMatrix(reformat(0.0 * part)) for part in w ]
def main1(): net = BinaryCRBM(10, 16, 2) data = loadmat("brazilRainfall.mat")["batchdata"] chunks = [(data[i * 90 + 2:(i + 1) * 90, :], [ data[i * 90 + 1:(i + 1) * 90 - 1, :], data[i * 90:(i + 1) * 90 - 2, :] ]) for i in range(24)] data = num.vstack([c[0] for c in chunks]) past = [num.vstack([c[1][i] for c in chunks]) for i in range(2)] data = data.transpose() past = [p.transpose() for p in past] print data.shape print data.shape[1] / 64 for p in past: print p.shape net.learnRate = 0.002 net.momentum = 0.9 net.weightCost = 0 for j, err in enumerate(net.trainXFerEnMasse(data, past, 100)): print j + 1, err ex = cm.CUDAMatrix(reformat(num.array([[1, 1], [2, 3]]))) print ex.euclid_norm()
def main(): batch_size = 128 # load data d = loadmat( 'patches_16x16x3.mat' ) # input in the format PxD (P vectorized samples with D dimensions) totnumcases = d["dataraw"].shape[0] numBatches = totnumcases / batch_size d = d["dataraw"][0:int(totnumcases / batch_size) * batch_size, :].copy() totnumcases = d.shape[0] # preprocess input dd = loadmat("pca_projections.mat") d = num.dot(dd["transform"], d.T).copy() # get the PCA projections data = cm.CUDAMatrix(reformat(d)) net = CovGRBM(d.shape[0], 400, 400, mbsz=128, initWeightSigma=0.02) d = loadmat("topo2D_3x3_stride1_400filt.mat") net.setFactorHiddenMatrix(-d["w2"]) net.hmcSteps = 20 freshData = lambda: (data.slice(b * batch_size, (b + 1) * batch_size) for b in range(numBatches)) highestEp = -1 for ep, mb in net.train(100, freshData, 10, True): if ep > highestEp: highestEp = ep print "Epoch %d" % (highestEp) print net.runningAvRej, net.hmcStepSize print "V2F:", net.visToFact.euclid_norm()
def main(): batch_size = 128 # load data d = loadmat('patches_16x16x3.mat') # input in the format PxD (P vectorized samples with D dimensions) totnumcases = d["dataraw"].shape[0] numBatches = totnumcases/batch_size d = d["dataraw"][0:int(totnumcases/batch_size)*batch_size,:].copy() totnumcases = d.shape[0] # preprocess input dd = loadmat("pca_projections.mat") d = num.dot(dd["transform"],d.T).copy() # get the PCA projections data = cm.CUDAMatrix(reformat(d)) net = CovGRBM(d.shape[0], 400, 400, mbsz = 128, initWeightSigma = 0.02) d = loadmat("topo2D_3x3_stride1_400filt.mat") net.setFactorHiddenMatrix(-d["w2"]) net.hmcSteps = 20 freshData = lambda : (data.slice(b*batch_size, (b+1)*batch_size) for b in range(numBatches)) highestEp = -1 for ep, mb in net.train(100, freshData, 10, True): if ep > highestEp: highestEp = ep print "Epoch %d" % (highestEp) print net.runningAvRej, net.hmcStepSize print "V2F:", net.visToFact.euclid_norm()
def updateSignOfWeights(self): """ We need the sign of the weights for L1 regularization. Since we work on the GPU it is convenient to just allocate storage for these things once and periodically update the sign variables when the weights they depend on have changed and we need to know the signs. """ if self.signVisToFact == None: self.signVisToFact = cm.CUDAMatrix( reformat(num.zeros((self.numVis, self.numFact)))) if self.signFactToHid == None: #probably not really needed since we constrain it to be negative self.signFactToHid = cm.CUDAMatrix( reformat(num.zeros((self.numFact, self.numHid)))) self.visToFact.sign(target=self.signVisToFact) self.factToHid.sign(target=self.signFactToHid)
def updateSignOfWeights(self): """ We need the sign of the weights for L1 regularization. Since we work on the GPU it is convenient to just allocate storage for these things once and periodically update the sign variables when the weights they depend on have changed and we need to know the signs. """ if self.signVisToHid == None or self.signA == None or self.signB == None: self.signVisToHid = cm.CUDAMatrix(reformat(num.zeros((self.numVis, self.numHid)))) self.signA = [cm.CUDAMatrix(reformat(num.zeros((self.numVis, self.numVis)))) for i in range(self.numPrev)] self.signB = [cm.CUDAMatrix(reformat(num.zeros((self.numVis, self.numHid)))) for i in range(self.numPrev)] self.visToHid.sign(target = self.signVisToHid) for i in range(self.numPrev): self.A[i].sign(target = self.signA[i]) self.B[i].sign(target = self.signB[i])
def initTemporary(self): self.hActs = cm.CUDAMatrix(reformat(num.zeros((self.numHid, self.mbsz)))) self.hActProbs = cm.CUDAMatrix(reformat(num.zeros((self.numHid, self.mbsz)))) self.negVis = cm.CUDAMatrix(reformat(num.zeros((self.numVis, self.mbsz)))) self.tempVisMB = cm.CUDAMatrix(reformat(num.zeros((self.numVis, self.mbsz)))) self.dynamicHidBias = cm.CUDAMatrix(reformat(num.zeros((self.numHid, self.mbsz)))) self.dynamicVisBias = cm.CUDAMatrix(reformat(num.zeros((self.numVis, self.mbsz)))) self.sMask = num.zeros((self.numVis, self.mbsz)) self.sMask[:self.smsz,:] = 1 self.gaussMask = 1-self.sMask self.onesCol = cm.CUDAMatrix(reformat(num.ones((self.numVis,1)))) self.sMask = cm.CUDAMatrix(reformat(self.sMask)) self.gaussMask = cm.CUDAMatrix(reformat(self.gaussMask)) self.tempRow = cm.CUDAMatrix(reformat(num.zeros((1, self.mbsz))))
def main1(): net = BinaryCRBM(10,16,2) data = loadmat("brazilRainfall.mat")["batchdata"] chunks = [(data[i*90+2:(i+1)*90,:], [data[i*90+1:(i+1)*90-1,:], data[i*90:(i+1)*90-2,:]]) for i in range(24)] data = num.vstack( [c[0] for c in chunks] ) past = [ num.vstack( [c[1][i] for c in chunks] ) for i in range(2)] data = data.transpose() past = [p.transpose() for p in past] print data.shape print data.shape[1]/64 for p in past: print p.shape net.learnRate = 0.002 net.momentum = 0.9 net.weightCost = 0 for j,err in enumerate(net.trainXFerEnMasse(data, past, 100)): print j+1, err ex = cm.CUDAMatrix(reformat(num.array([[1,1],[2,3]]))) print ex.euclid_norm()
def getFilteringDist(net, data, index, preSigmoid=False): """ We use this name to correspond more closely to Graham's matlab code. This function sends the visible data stored in data through net to produce hidden unit activations for every valid position of net. The valid positions are given by index. """ assert (len(index.shape) == 1) pred = [] numcases = index.shape[0] num_mini_batches = numcases / net.mbsz excess = numcases - num_mini_batches * net.mbsz for mb in range(num_mini_batches): mbIdx = index[mb * net.mbsz:(mb + 1) * net.mbsz] net.vis = cm.CUDAMatrix(reformat(data[:, mbIdx])) net.past = [ cm.CUDAMatrix(reformat(data[:, mbIdx - i - 1])) for i in range(net.numPrev) ] if preSigmoid: net.hidNetInpts() else: net.hidActProbs() net.hActProbs.copy_to_host() pred.append(net.hActProbs.numpy_array.copy()) if excess > 0: batch = num.zeros(net.vis.shape) mbIdx = index[num_mini_batches * net.mbsz:] batch[:, :excess] = data[:, mbIdx] net.vis = cm.CUDAMatrix(reformat(batch)) net.past = [] for i in range(net.numPrev): batch[:, :excess] = data[:, mbIdx - i - 1] net.past.append(cm.CUDAMatrix(reformat(batch))) if preSigmoid: net.hidNetInpts() else: net.hidActProbs() net.hActProbs.copy_to_host() pred.append(net.hActProbs.numpy_array.copy()[:, :excess]) return num.hstack(pred)
def loadWeights(self, wDict): """ This code is terrible. """ assert (all(wName in wDict for wName in self.weightVariableNames())) for w_name in wDict: if w_name in self.weightVariableNames(): w = wDict[w_name] if isinstance(w, list) or w_name in ["A", "B"]: assert (all(self.__dict__[w_name][i].numpy_array.shape == wDict[w_name][i].shape for i in range(len(wDict[w_name])))) self.__dict__[w_name] = [ cm.CUDAMatrix(reformat(part)) for part in w ] else: assert (self.__dict__[w_name].numpy_array.shape == wDict[w_name].shape) self.__dict__[w_name] = cm.CUDAMatrix(reformat(w))
def getFilteringDist(net, data, index, preSigmoid = False): """ We use this name to correspond more closely to Graham's matlab code. This function sends the visible data stored in data through net to produce hidden unit activations for every valid position of net. The valid positions are given by index. """ assert(len(index.shape)==1) pred = [] numcases = index.shape[0] num_mini_batches = numcases / net.mbsz excess = numcases - num_mini_batches*net.mbsz for mb in range(num_mini_batches): mbIdx = index[ mb*net.mbsz:(mb+1)*net.mbsz ] net.vis = cm.CUDAMatrix(reformat(data[:,mbIdx])) net.past = [ cm.CUDAMatrix(reformat(data[:,mbIdx-i-1])) for i in range(net.numPrev) ] if preSigmoid: net.hidNetInpts() else: net.hidActProbs() net.hActProbs.copy_to_host() pred.append(net.hActProbs.numpy_array.copy()) if excess > 0: batch = num.zeros(net.vis.shape) mbIdx = index[ num_mini_batches*net.mbsz:] batch[:,:excess] = data[:,mbIdx] net.vis = cm.CUDAMatrix(reformat(batch)) net.past = [] for i in range(net.numPrev): batch[:,:excess] = data[:,mbIdx-i-1] net.past.append(cm.CUDAMatrix(reformat(batch))) if preSigmoid: net.hidNetInpts() else: net.hidActProbs() net.hActProbs.copy_to_host() pred.append(net.hActProbs.numpy_array.copy()[:,:excess]) return num.hstack(pred)
def step(self, data): if isinstance(data, cm.CUDAMatrix): self.inp = data else: self.inp = cm.CUDAMatrix(reformat(data)) self.fprop() recErr = self.curRecErr() self.bprop() for j, wname in enumerate(self.weightVariableNames()): #NOTE THE UNUSUAL SIGN CONVENTION HERE self.__dict__[wname].subtract_mult( self.__dict__["d"+wname], self.learnRate/self.mbsz ) return recErr
def testMaskedSM(): x = num.random.randn(16 + 21, 1024) k = 16 x = x.astype("float32") r = x.copy() t = time.time() xGPU = cm.CUDAMatrix(reformat(x)) r[:k, :] = cpuSoftmax(r[:k, :]) print time.time() - t tempCol = cm.CUDAMatrix(reformat(num.ones((xGPU.shape[0], 1)))) tempRow = cm.CUDAMatrix(reformat(num.zeros((1, xGPU.shape[1])))) tempMatrix = cm.CUDAMatrix(reformat(num.zeros(xGPU.shape))) sMask = num.zeros(xGPU.shape) sMask[:k, :] = 1 notSMask = 1 - sMask sMask = cm.CUDAMatrix(reformat(sMask)) notSMask = cm.CUDAMatrix(reformat(notSMask)) t = time.time() maskedSingleSoftmax(xGPU, tempMatrix, sMask, notSMask, tempCol, tempRow) print time.time() - t xGPU.copy_to_host() diff = r - xGPU.numpy_array #print diff print num.sum(num.abs(diff))
def testMaskedSM(): x = num.random.randn(16+21,1024) k = 16 x = x.astype("float32") r = x.copy() t = time.time() xGPU = cm.CUDAMatrix(reformat(x)) r[:k,:] = cpuSoftmax(r[:k,:]) print time.time()-t tempCol = cm.CUDAMatrix(reformat(num.ones((xGPU.shape[0],1)))) tempRow = cm.CUDAMatrix(reformat(num.zeros((1,xGPU.shape[1])))) tempMatrix = cm.CUDAMatrix(reformat(num.zeros(xGPU.shape))) sMask = num.zeros(xGPU.shape) sMask[:k,:] = 1 notSMask = 1-sMask sMask = cm.CUDAMatrix(reformat(sMask)) notSMask = cm.CUDAMatrix(reformat(notSMask)) t = time.time() maskedSingleSoftmax(xGPU, tempMatrix, sMask, notSMask, tempCol, tempRow) print time.time()-t xGPU.copy_to_host() diff = r-xGPU.numpy_array #print diff print num.sum(num.abs(diff))
def initTemporary(self): self.hActs = cm.CUDAMatrix(reformat(num.zeros((self.numHid, self.mbsz)))) self.hActProbs = cm.CUDAMatrix(reformat(num.zeros((self.numHid, self.mbsz)))) self.negVis = cm.CUDAMatrix(reformat(num.zeros((self.numVis, self.mbsz)))) self.tempVisMB = cm.CUDAMatrix(reformat(num.zeros((self.numVis, self.mbsz)))) self.dynamicHidBias = cm.CUDAMatrix(reformat(num.zeros((self.numHid, self.mbsz)))) self.dynamicVisBias = cm.CUDAMatrix(reformat(num.zeros((self.numVis, self.mbsz))))
def initTemporary(self): self.hActs = cm.CUDAMatrix( reformat(num.zeros((self.numHid, self.mbsz)))) self.hActProbs = cm.CUDAMatrix( reformat(num.zeros((self.numHid, self.mbsz)))) self.negVis = cm.CUDAMatrix( reformat(num.zeros((self.numVis, self.mbsz)))) self.tempVisMB = cm.CUDAMatrix( reformat(num.zeros((self.numVis, self.mbsz)))) self.dynamicHidBias = cm.CUDAMatrix( reformat(num.zeros((self.numHid, self.mbsz)))) self.dynamicVisBias = cm.CUDAMatrix( reformat(num.zeros((self.numVis, self.mbsz)))) self.sMask = num.zeros((self.numVis, self.mbsz)) self.sMask[:self.smsz, :] = 1 self.gaussMask = 1 - self.sMask self.onesCol = cm.CUDAMatrix(reformat(num.ones((self.numVis, 1)))) self.sMask = cm.CUDAMatrix(reformat(self.sMask)) self.gaussMask = cm.CUDAMatrix(reformat(self.gaussMask)) self.tempRow = cm.CUDAMatrix(reformat(num.zeros((1, self.mbsz))))
def updateSignOfWeights(self): """ We need the sign of the weights for L1 regularization. Since we work on the GPU it is convenient to just allocate storage for these things once and periodically update the sign variables when the weights they depend on have changed and we need to know the signs. """ if self.signVisToHid == None or self.signA == None or self.signB == None: self.signVisToHid = cm.CUDAMatrix( reformat(num.zeros((self.numVis, self.numHid)))) self.signA = [ cm.CUDAMatrix(reformat(num.zeros((self.numVis, self.numVis)))) for i in range(self.numPrev) ] self.signB = [ cm.CUDAMatrix(reformat(num.zeros((self.numVis, self.numHid)))) for i in range(self.numPrev) ] self.visToHid.sign(target=self.signVisToHid) for i in range(self.numPrev): self.A[i].sign(target=self.signA[i]) self.B[i].sign(target=self.signB[i])
def rbmVtoH(m, X) : """convey data fron visual layer to hidden layer""" cm.cublas_init() # copy data to GPU data = cm.CUDAMatrix(cm.reformat(X)) weight = cm.CUDAMatrix(cm.reformat(m.weight)) biasH = cm.CUDAMatrix(cm.reformat(m.biasH)) nCase = X.shape[0] nHid = biasH.asarray().size hidActP = cm.CUDAMatrix(np.zeros((nCase, nHid))) if m.type == "BB" : cm.dot(data, weight, target = hidActP) hidActP.add_row_vec(biasH) hidActP.apply_sigmoid() elif m.type == "BG" : cm.dot(data, weight, target = hidActP) hidActP.add_row_vec(biasH) elif m.type == "GB" : pass result = hidActP.asarray() # free device memory data.free_device_memory() weight.free_device_memory() biasH.free_device_memory() hidActP.free_device_memory() cm.shutdown() return result
def load_matrix(self, array): """ For a cudamat array that already exists, copy over new data from a numpy.ndarray. Must be of right size """ assert(self.shape == array.shape) array = reformat(array) self.numpy_array = array self.free_device_memory() _cudamat.init_from_array(self.p_mat, array.ctypes.data_as(ct.POINTER(ct.c_float)), ct.c_int(array.shape[0]), ct.c_int(array.shape[1])) err_code = _cudamat.copy_to_device(self.p_mat) if err_code: raise generate_exception(err_code) self.T = cudamat.TransposedCUDAMatrix(self.mat) return self
def initTemporary(self): self.hActs = cm.CUDAMatrix( reformat(num.zeros((self.numHid, self.mbsz)))) self.hActProbs = cm.CUDAMatrix( reformat(num.zeros((self.numHid, self.mbsz)))) self.negVis = cm.CUDAMatrix( reformat(num.zeros((self.numVis, self.mbsz)))) self.tempVisMB = cm.CUDAMatrix( reformat(num.zeros((self.numVis, self.mbsz)))) self.dynamicHidBias = cm.CUDAMatrix( reformat(num.zeros((self.numHid, self.mbsz)))) self.dynamicVisBias = cm.CUDAMatrix( reformat(num.zeros((self.numVis, self.mbsz))))
def set_column_vectors(self, order, start, end, src): """ Copies columns from src in self. The columns are copied in the order specified by indices=order[start .. end] DJ Nov 13. 2012. """ if isinstance(order, cudamat.CUDAMatrix): orderMat = order else: orderMat = cudamat.CUDAMatrix(cudamat.reformat(order)) err_code = _cudamat_ext.set_column_vectors(self.p_mat, \ orderMat.p_mat, src.p_mat, \ ct.c_int(start), ct.c_int(end)) if err_code: raise generate_exception(err_code) return self
def get_column_vectors(self, order, start, end, target = None): """ Copies columns from self into target. The columns are copied in the order specified by indices=order[start .. end] DJ March 1. 2010. """ if target is None: target = empty((self.shape[0], end-start)) if isinstance(order, cudamat.CUDAMatrix): orderMat = order else: orderMat = cudamat.CUDAMatrix(cudamat.reformat(order)) err_code = _cudamat_ext.get_column_vectors(self.p_mat, \ orderMat.p_mat, target.p_mat, \ ct.c_int(start), ct.c_int(end)) if err_code: raise generate_exception(err_code) return target
def copy_subsequences(self, order, start, end, seqLength, target): """ Copies subsequences starting at indices specfied in order. i.e. specified by indices=order[start .. end]. Each of length = seqLength. """ if not target: raise CUDAMatException("target not specified. target cannot be null") if isinstance(order, CUDAMatrix): orderMat = order else: orderMat = cudamat.CUDAMatrix(cudamat.reformat(order)) err_code = _cudamat_ext.copy_subsequences(self.p_mat, \ orderMat.p_mat, target.p_mat, \ ct.c_int(start), ct.c_int(end),\ ct.c_int(seqLength)) if err_code: raise generate_exception(err_code) return self
def loadWeights(self, wDict): for w_name in self.weightVariableNames(): assert( wDict.has_key(w_name) ) w = wDict[w_name] assert( self.__dict__[w_name].numpy_array.shape == wDict[w_name].shape ) self.__dict__[w_name] = cm.CUDAMatrix(reformat(w))
def rbm(data, numHid, modelType = "BB", **kwargs) : """ rbm defination data : when type is BB, should be binary, or in [0,1] to be interpreted as probabilities when type is GB, should be continuous real value. data should have a format of *.npy numHid : number nodes of and hidden layer type : rbm type, can be set as BB or GB additional inputs (specified as name value pairs or in struct) method CD or SML eta learning rate momentum momentum for smoothness amd to prevent overfitting NOTE: momentum is not recommended with SML maxepoch # of epochs: each is a full pass through train data avglast how many epochs before maxepoch to start averaging before. Procedure suggested for faster convergence by Kevin Swersky in his MSc thesis penalty weight decay factor batchsize The number of training instances per batch verbose For printing progress anneal Flag. If set true, the penalty is annealed linearly through epochs to 10% of its original value OUTPUTS: model.type Type of RBM (i.e. type of its visible and hidden units) model.weight The weights of the connections model.biasH The biases of the hidden layer model.biasV The biases of the visible layer model.top The activity of the top layer, to be used when training DBN's errors The errors in reconstruction at every epoch """ arg = util.processOptions(kwargs, \ method = "CD", \ eta = 0.1, \ momentum = 0.9,\ maxEpoch = 50, \ avgLast = 0, \ penalty = 0, \ batchSize = 50, \ verbose = True, \ anneal = False) [method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose, anneal] = [\ arg["method"],\ arg["eta"],\ arg["momentum"],\ arg["maxEpoch"],\ arg["avgLast"],\ arg["penalty"],\ arg["batchSize"],\ arg["verbose"],\ arg["anneal"] ] # from which step, we start to compute the average avgStart = maxEpoch - avgLast # for weight decay use oldPenalty = penalty # numCases : number of example # numDims : the length of each example # each row is an example [numCases, numDims] = list(data.shape) if verbose : print "processing data" numVis = numDims numBatch = util.ceil(numCases,batchSize) # shuffle the data np.random.shuffle(data) # init CUDA # cm.cuda_set_device() cm.cublas_init() cm.CUDAMatrix.init_random(100) deviceData = cm.CUDAMatrix(cm.reformat(data)) # init weights weight = cm.CUDAMatrix(0.1*np.random.randn(numVis,numHid)) biasV = cm.CUDAMatrix(np.zeros((1, numVis))) biasH = cm.CUDAMatrix(np.zeros((1, numHid))) # init weight update weightInc = cm.CUDAMatrix(np.zeros((numVis,numHid))) biasVInc = cm.CUDAMatrix(np.zeros((1,numVis))) biasHInc = cm.CUDAMatrix(np.zeros((1,numHid))) #init temporary storage visActP = cm.empty((batchSize, numVis)) hidActP = cm.empty((batchSize, numHid)) hidActP2 = cm.empty((batchSize, numHid)) visState = cm.empty((batchSize,numVis)) hidState = cm.empty((batchSize, numHid)) t = 1 for epoch in range(maxEpoch) : error = [] if anneal : # apply linear weight decay penalty = oldPenalty - 0.9 *epoch/maxEpoch*oldPenalty for batch in range(numBatch) : # train each data batch if batchSize*(batch+1) > numCases : visTrue = deviceData.get_row_slice(batchSize*batch, numCases) batchSize = visTrue.shape[0] else : visTrue = deviceData.get_row_slice(batchSize*batch, batchSize*(batch+1)) batchSize = visTrue.shape[0] visActP.assign(visTrue) # positive phase cm.dot(visActP, weight, target = hidActP) hidActP.add_row_vec(biasH) hidActP.apply_sigmoid() hidState.fill_with_rand() hidState.less_than(hidActP, target=hidState) if cmp(method, "SML") == 0 : if np.logical_and(np.equal(epoch,1), np.equal(batch,1)) : pass # here does not need in practical use elif cmp(method, "CD") == 0 : pass # negetive phase if cmp(modelType, "BB") == 0 : cm.dot(hidState, weight.transpose(), target = visActP) visActP.add_row_vec(biasV) visActP.apply_sigmoid() visState.fill_with_rand() visState.less_than(visActP, target = visState) elif cmp(modelType, "GB") == 0 : cm.dot(hidState, weight.transpose(), target = visActP) visActP.add_row_vec(biasV) visActP.add(np.random.randn(batchSize, numVis),target=visState) # another positive phase cm.dot(visState, weight, target = hidActP2) hidActP2.add_row_vec(biasH) hidActP2.apply_sigmoid() hidState.fill_with_rand() hidState.less_than(hidActP2, target=hidState) #update weight and bias dWeight = cm.dot(visTrue.transpose(), hidActP) dWeight.subtract_dot(visState.transpose(), hidActP2) dBiasV = visTrue.sum(axis = 0).subtract(visState.sum(axis = 0)) dBiasH = hidActP.sum(axis=0).subtract(hidActP2.sum(axis = 0)) dWeight.divide(batchSize).subtract(weight.mult(penalty)) dBiasV.divide(batchSize) dBiasH.divide(batchSize) weightInc.mult(momentum).add_mult(dWeight, eta) biasVInc.mult(momentum).add_mult(dBiasV, eta) biasHInc.mult(momentum).add_mult(dBiasH, eta) weight.add(weightInc) biasV.add(biasVInc) biasH.add(biasHInc) if epoch > avgStart : # apply average weightAgv.subtract(weightAgv.subtract(weight).mult(1.0/t)) biasVAgv.subtract(biasVAgv.subtract(biasV).mult(1.0/t)) biasHAgv.subtract(biasHAgv.subtract(biasH).mult(1.0/t)) t = t+1 else : weightAgv = weight biasVAgv = biasV biasHAgv = biasH # reconstruction error visTrue.subtract(visActP) error.append(visTrue.euclid_norm() ** 2) if verbose : print "epoch %d/%d. Reconstruction error is %f " % (epoch+1, maxEpoch, sum(error)) # save rbm model top = cm.CUDAMatrix(np.zeros((numCases, numHid))) cm.dot(deviceData, weightAgv, target = top) top.add_row_vec(biasHAgv) top.apply_sigmoid() model_ = m.rbmModel(weightAgv,biasVAgv,biasHAgv,type = modelType,top = top) cm.shutdown() return model_
def test(): num.random.seed(10) m = 1 net = CovGRBM(8, 4, 16, mbsz = m, initWeightSigma = 0.5) vis = cm.CUDAMatrix(reformat(2*num.random.randn(8,m))) #hid = cm.CUDAMatrix(reformat(2*num.random.rand(16,16))) net.vis = vis net.hidActProbs() delta = 0.00001 print net.energy(vis, net.hActProbs) #get derivs we compute with update rules net.CDStats(net.vis, net.hActProbs, True) net.dvisToFact.copy_to_host() dvisToFact = net.dvisToFact.numpy_array.copy() net.dfactToHid.copy_to_host() dfactToHid = net.dfactToHid.numpy_array.copy() net.dhidBias.copy_to_host() dhidBias = net.dhidBias.numpy_array.copy() print net.energyCPU(vis, net.hActProbs) net.visToFact.copy_to_host() vToF = net.visToFact.numpy_array.copy() vToFA = vToF.copy() vToFB = vToF.copy() vToFB[2,3] += delta vToFA[2,3] -= delta net.visToFact = cm.CUDAMatrix(reformat(vToFB)) EB = net.energy(vis, net.hActProbs) net.visToFact = cm.CUDAMatrix(reformat(vToFA)) EA = net.energy(vis, net.hActProbs) deriv = (EB-EA)/(2*delta) print "deriv:", dvisToFact[2,3] print "finite differences:", deriv net.visToFact = cm.CUDAMatrix(reformat(vToF)) print print net.dEdP(vis, net.hActProbs, 3,2) fToH = net.factToHid.numpy_array.copy() fToHA = fToH.copy() fToHB = fToH.copy() fToHB[2,3] += delta fToHA[2,3] -= delta net.factToHid = cm.CUDAMatrix(reformat(fToHB)) EB = net.energy(vis, net.hActProbs) net.factToHid = cm.CUDAMatrix(reformat(fToHA)) EA = net.energy(vis, net.hActProbs) deriv = (EB-EA)/(2*delta) print "deriv:", dfactToHid[2,3] print "finite differences:", deriv net.factToHid = cm.CUDAMatrix(reformat(fToH)) bias = net.hidBias.numpy_array.copy() biasA = bias.copy() biasB = bias.copy() biasB[2,0] += delta biasA[2,0] -= delta net.hidBias = cm.CUDAMatrix(reformat(biasB)) EB = net.energy(vis, net.hActProbs) net.hidBias = cm.CUDAMatrix(reformat(biasA)) EA = net.energy(vis, net.hActProbs) deriv = (EB-EA)/(2*delta) print "deriv:", dhidBias[2,0] print "finite differences:", deriv net.hidBias = cm.CUDAMatrix(reformat(bias))
dlog(' pack lodaed from train: (%s)' % ', '.join(map(lambda i: str(i), dat_train.shape))) batch_packs_train = dat_train.shape[0] # shuffle data np.random.shuffle(dat_train) dat_train = dat_train.T dlog(' Go through dat_train') for batch_pack_inx in range(batch_packs_train): dlog(' batch_pack_inx = %i' % batch_pack_inx) dat_tmp = dat_train[:, (batch_pack_inx*batch_size*batches_in_free_mem):((batch_pack_inx + 1)*batch_size*batches_in_free_mem)] if dat_tmp.shape[1] == 0: break try: dev_dat_train = cm.CUDAMatrix( cm.reformat(dat_tmp)) except Exception as e: print 'CUDAMAT ERROR: ' + e.message cm.cublas_shutdown() exit(0) dlog(' dev_dat_train.shape = [%s]' % ', '.join(map(lambda x: str(x), dev_dat_train.shape))) num_batches_train = dev_dat_train.shape[1]/batch_size for batch in range(num_batches_train): # sample dropout if options.drop_out is not None: do_h.fill_with_rand() do_h.less_than(options.drop_out)
def rbm(X, numHid, **kwargs) : """ rbm defination data : when type is BB, should be binary, or in [0,1] to be interpreted as probabilities when type is GB, should be continuous real value. data should have a format of *.npy numHid : number nodes of and hidden layer type : rbm type, can be set as BB or GB method CD or SML eta learning rate momentum momentum for smoothness amd to prevent overfitting NOTE: momentum is not recommended with SML maxepoch # of epochs: each is a full pass through train data avglast how many epochs before maxepoch to start averaging before. Procedure suggested for faster convergence by Kevin Swersky in his MSc thesis batchsize The number of training instances per batch verbose For printing progress model.type Type of RBM (i.e. type of its visible and hidden units) model.weight The weights of the connections model.biasH The biases of the hidden layer model.biasV The biases of the visible layer model.top The activity of the top layer, to be used when training DBN's errors The errors in reconstruction at every epoch """ # when compute the transpose of a matrix, using the method *.transpose() is much space consuming. I suggest we can use # .T atrribute instead arg = util.processOptions(kwargs, \ modelType = "BB", \ method = "CD", \ eta = 0.1, \ momentum = 0.5,\ maxEpoch = 500, \ avgLast = 0, \ penalty = 0, \ batchSize = 100, \ verbose = True) [modelType, method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose] = [\ arg["modelType"], \ arg["method"],\ arg["eta"],\ arg["momentum"],\ arg["maxEpoch"],\ arg["avgLast"],\ arg["penalty"],\ arg["batchSize"],\ arg["verbose"] ] # from which step, we start to compute the average # avgStart = maxEpoch - avgLast # for weight decay use # oldPenalty = penalty # numCases : number of example # numDims : the length of each example # each row is an example [numCases, numDims] = list(X.shape) if verbose : print "processing data" numVis = numDims numBatch = util.ceil(numCases, batchSize) # shuffle the data data = copy.deepcopy(X) np.random.shuffle(data) # init CUDA # cm.cuda_set_device() cm.cublas_init() cm.CUDAMatrix.init_random(100) deviceData = cm.CUDAMatrix(cm.reformat(data)) # init weights weight = cm.CUDAMatrix(0.1*np.random.randn(numVis,numHid)) biasV = cm.CUDAMatrix(np.zeros((1, numVis))) biasH = cm.CUDAMatrix(np.zeros((1, numHid))) # init weight update weightInc = cm.CUDAMatrix(np.zeros((numVis,numHid))) biasVInc = cm.CUDAMatrix(np.zeros((1,numVis))) biasHInc = cm.CUDAMatrix(np.zeros((1,numHid))) #init temporary storage visActP = cm.empty((batchSize, numVis)) hidActP = cm.empty((batchSize, numHid)) hidState = cm.empty((batchSize, numHid)) for epoch in range(maxEpoch) : error = [] for batch in range(numBatch) : # train each data batch if batchSize*(batch+1) > numCases : visTrue = deviceData.get_row_slice(batchSize*batch, numCases) batchSize = visTrue.shape[0] visActP = cm.empty((batchSize, numVis)) hidActP = cm.empty((batchSize, numHid)) hidState = cm.empty((batchSize, numHid)) else : visTrue = deviceData.get_row_slice(batchSize*batch, batchSize*(batch+1)) batchSize = visTrue.shape[0] visActP.assign(visTrue) #apply momentum weightInc.mult(momentum) biasVInc.mult(momentum) biasHInc.mult(momentum) # positive phase cm.dot(visActP, weight, target = hidActP) hidActP.add_row_vec(biasH) hidActP.apply_sigmoid() weightInc.add_dot(visActP.T, hidActP) biasVInc.add_sums(visActP, axis=0) biasHInc.add_sums(hidActP, axis=0) hidState.fill_with_rand() hidState.less_than(hidActP, target=hidActP) if cmp(method, "SML") == 0 : if np.logical_and(np.equal(epoch,1), np.equal(batch,1)) : pass # here does not need in practical use elif cmp(method, "CD") == 0 : pass # negetive phase if cmp(modelType, "BB") == 0 : cm.dot(hidActP, weight.T, target = visActP) visActP.add_row_vec(biasV) visActP.apply_sigmoid() elif cmp(modelType, "GB") == 0 : cm.dot(hidActP, weight.T, target = visActP) visActP.add_row_vec(biasV) visActP.add(np.random.randn(batchSize, numVis),target=visActP) # another positive phase cm.dot(visActP, weight, target = hidActP) hidActP.add_row_vec(biasH) hidActP.apply_sigmoid() weightInc.subtract_dot(visActP.T, hidActP) biasVInc.add_sums(visActP, axis=0, mult=-1) biasHInc.add_sums(hidActP, axis=0, mult=-1) #update weight and bias weight.add_mult(weightInc, eta/batchSize) biasV.add_mult(biasVInc, eta/batchSize) biasH.add_mult(biasHInc, eta/batchSize) # if epoch > avgStart : # # apply average # weightAgv.subtract(weightAgv.subtract(weight).mult(1.0/t)) # biasVAgv.subtract(biasVAgv.subtract(biasV).mult(1.0/t)) # biasHAgv.subtract(biasHAgv.subtract(biasH).mult(1.0/t)) # t = t+1 # else : # weightAgv = weight # biasVAgv = biasV # biasHAgv = biasH # reconstruction error visTrue.subtract(visActP) error.append(visTrue.euclid_norm() ** 2) # free device memory visTrue.free_device_memory() if verbose : print "epoch %d/%d. Reconstruction error is %f " % (epoch+1, maxEpoch, sum(error)) # save rbm model top = cm.CUDAMatrix(np.zeros((numCases, numHid))) cm.dot(cm.CUDAMatrix(cm.reformat(X)), weight, target = top) top.add_row_vec(biasH) top.apply_sigmoid() weight.copy_to_host() biasV.copy_to_host() biasH.copy_to_host() top.copy_to_host() model_ = m.rbmModel(weight.numpy_array, biasV.numpy_array, \ biasH.numpy_array, type = modelType, top = top.numpy_array) # free device memory deviceData.free_device_memory() weight.free_device_memory() biasV.free_device_memory() biasH.free_device_memory() weightInc.free_device_memory() biasVInc.free_device_memory() biasHInc.free_device_memory() hidActP.free_device_memory() visActP.free_device_memory() hidState.free_device_memory() cm.shutdown() return model_
import time import numpy as np import cudamat as cm import util # initialize CUDA cm.cublas_init() cm.CUDAMatrix.init_random(1) # load data util.load('mnist.dat', globals()) dev_dat = cm.CUDAMatrix(cm.reformat(dat/255.)) # training parameters epsilon = 0.1 momentum = 0.9 num_epochs = 30 batch_size = 128 num_batches = dat.shape[1]/batch_size # model parameters num_vis = dat.shape[0] num_hid = 4096 # initialize weights w_vh = cm.CUDAMatrix(0.1 * np.random.randn(num_vis, num_hid)) w_v = cm.CUDAMatrix(np.zeros((num_vis, 1))) w_h = cm.CUDAMatrix(-4.*np.ones((num_hid, 1))) # initialize weight updates
import time import numpy as np import cudamat as cm import util # initialize CUDA cm.cublas_init() cm.CUDAMatrix.init_random(1) # load data util.load('mnist.dat', globals()) dev_dat = cm.CUDAMatrix(cm.reformat(dat/255.)) # training parameters epsilon = 0.1 momentum = 0.9 num_epochs = 30 batch_size = 128 num_batches = dat.shape[1]/batch_size # model parameters num_vis = dat.shape[0] num_hid = 4096 # initialize weights w_vh = cm.CUDAMatrix(cm.reformat(0.1 * np.random.randn(num_vis, num_hid))) w_v = cm.CUDAMatrix(cm.reformat(np.zeros((num_vis, 1)))) w_h = cm.CUDAMatrix(cm.reformat(-4.*np.ones((num_hid, 1)))) # initialize weight updates
def rbmFit(X, numHid, y, isSaveModel=False, name=None, **kwargs): """ X ... data. should be binary, or in [0,1] interpreted as ... probabilities numhid ... number of hidden units y ... List of discrete labels nClass number of classes method CD or SML eta learning rate momentum momentum for smoothness amd to prevent overfitting NOTE: momentum is not recommended with SML maxepoch # of epochs: each is a full pass through train data avglast how many epochs before maxepoch to start averaging before. Procedure suggested for faster convergence by Kevin Swersky in his MSc thesis batchsize The number of training instances per batch verbose For printing progress model.weight The weights of the connections model.biasH The biases of the hidden layer model.biasV The biases of the visible layer model.weightlabel ... The weights on labels layer model.biasLabel ... The biases on labels layer errors The errors in reconstruction at each epoch """ arg = util.processOptions(kwargs, \ nClass = np.unique(y).size, \ method = "CD", \ eta = 0.1, \ momentum = 0.5,\ maxEpoch = 500, \ avgLast = 0, \ penalty = 0, \ batchSize = 100, \ verbose = True) [nClass, method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose] = [\ arg["nClass"],\ arg["method"],\ arg["eta"],\ arg["momentum"],\ arg["maxEpoch"],\ arg["avgLast"],\ arg["penalty"],\ arg["batchSize"],\ arg["verbose"] ] if verbose: print "Processing data ..." # from which step, we start to compute the average # avgStart = maxEpoch - avgLast # for weight decay use # oldPenalty = penalty # numCases : number of example # numDims : the length of each example # each row is an example [numCases, numDims] = list(X.shape) numVis = numDims uniqueLabel = np.unique(y) numBatch = util.ceil(numCases, batchSize) y = util.matrixLabel(y) # shuffle data and label data = copy.deepcopy(X) [data, label] = util.shuffle(data, y) # init CUDA cm.cublas_init() cm.CUDAMatrix.init_random(100) deviceData = cm.CUDAMatrix(cm.reformat(data)) deviceLabel = cm.CUDAMatrix(cm.reformat(label)) # init weights weight = cm.CUDAMatrix(0.1 * np.random.randn(numVis, numHid)) biasV = cm.CUDAMatrix(np.zeros((1, numVis))) biasH = cm.CUDAMatrix(np.zeros((1, numHid))) weightLabel = cm.CUDAMatrix(0.1 * np.random.randn(nClass, numHid)) biasLabel = cm.CUDAMatrix(np.zeros((1, nClass))) # init weight update weightInc = cm.CUDAMatrix(np.zeros((numVis, numHid))) biasVInc = cm.CUDAMatrix(np.zeros((1, numVis))) biasHInc = cm.CUDAMatrix(np.zeros((1, numHid))) weightLabelInc = cm.CUDAMatrix(np.zeros((nClass, numHid))) biasLabelInc = cm.CUDAMatrix(np.zeros((1, nClass))) #init temporary storage visActP = cm.empty((batchSize, numVis)) hidActP = cm.empty((batchSize, numHid)) hidState = cm.empty((batchSize, numHid)) for epoch in range(maxEpoch): error = [] for batch in range(numBatch): # train each data batch if batchSize * (batch + 1) > numCases: visTrue = deviceData.get_row_slice(batchSize * batch, numCases) labelTrue = deviceLabel.get_row_slice(batchSize * batch, numCases) batchSize = visTrue.shape[0] visActP = cm.empty((batchSize, numVis)) hidActP = cm.empty((batchSize, numHid)) hidState = cm.empty((batchSize, numHid)) else: visTrue = deviceData.get_row_slice(batchSize * batch, batchSize * (batch + 1)) labelTrue = deviceLabel.get_row_slice(batchSize * batch, batchSize * (batch + 1)) batchSize = visTrue.shape[0] visActP.assign(visTrue) #apply momentum weightInc.mult(momentum) biasVInc.mult(momentum) biasHInc.mult(momentum) weightLabel.mult(momentum) biasLabel.mult(momentum) # positive phase cm.dot(visActP, weight, target=hidActP) hidActP.add_dot(labelTrue, weightLabel) hidActP.add_row_vec(biasH) hidActP.apply_sigmoid() weightInc.add_dot(visActP.T, hidActP) biasVInc.add_sums(visActP, axis=0) biasHInc.add_sums(hidActP, axis=0) weightLabelInc.add_dot(labelTrue.T, hidActP) biasLabelInc.add_sums(labelTrue, axis=0) hidState.fill_with_rand() hidState.less_than(hidActP, target=hidActP) if cmp(method, "SML") == 0: if np.logical_and(np.equal(epoch, 1), np.equal(batch, 1)): pass # here does not need in practical use elif cmp(method, "CD") == 0: pass # negative phase cm.dot(hidActP, weight.T, target=visActP) visActP.add_row_vec(biasV) visActP.apply_sigmoid() cm.dot(hidActP, weightLabel.T, target=labelTrue) labelTrue.add_row_vec(biasLabel) labelTrue = util.softmax(labelTrue) # another positive phase cm.dot(visActP, weight, target=hidActP) hidActP.add_dot(labelTrue, weightLabel) hidActP.add_row_vec(biasH) hidActP.apply_sigmoid() weightInc.subtract_dot(visActP.T, hidActP) biasVInc.add_sums(visActP, axis=0, mult=-1) biasHInc.add_sums(hidActP, axis=0, mult=-1) weightLabelInc.subtract_dot(labelTrue.T, hidActP) biasLabelInc.add_sums(labelTrue, axis=0, mult=-1) # update weights and bias weight.add_mult(weightInc, eta / batchSize) biasV.add_mult(biasVInc, eta / batchSize) biasH.add_mult(biasHInc, eta / batchSize) weightLabel.add_mult(weightLabelInc, eta / batchSize) biasLabel.add_mult(biasLabelInc, eta / batchSize) # calculate reconstruction error visTrue.subtract(visActP) error.append(visTrue.euclid_norm()**2) # free memory visTrue.free_device_memory() labelTrue.free_device_memory() if verbose: print "Epoch %d/%d, reconstruction error is %f " % ( epoch + 1, maxEpoch, sum(error)) # save rbm model weight.copy_to_host() biasV.copy_to_host() biasH.copy_to_host() weightLabel.copy_to_host() biasLabel.copy_to_host() model_ = m.rbmModel(weight.numpy_array, biasV.numpy_array, biasH.numpy_array, \ weightLabel = weightLabel.numpy_array,\ biasLabel = biasLabel.numpy_array, labels = uniqueLabel) # free device memory deviceData.free_device_memory() deviceLabel.free_device_memory() weight.free_device_memory() biasV.free_device_memory() biasH.free_device_memory() weightLabel.free_device_memory() biasLabel.free_device_memory() weightInc.free_device_memory() biasVInc.free_device_memory() biasHInc.free_device_memory() weightLabelInc.free_device_memory() biasLabelInc.free_device_memory() hidActP.free_device_memory() visActP.free_device_memory() hidState.free_device_memory() cm.shutdown() if isSaveModel: modelList = [] modelList.append(model_) model = np.array(modelList) np.save(name, model) return model_
import time import numpy as np import cudamat as cm import util # initialize CUDA cm.cublas_init() cm.CUDAMatrix.init_random(1) # load data #util.load('mnist.dat', globals()) #dev_dat = cm.CUDAMatrix(cm.reformat(dat/255.)) dev_dat = cm.CUDAMatrix(cm.reformat(np.random.rand(1024,6000))) # training parameters epsilon = 0.1 momentum = 0.9 num_epochs = 1 batch_size = 1 num_batches = 6000/batch_size # model parameters num_vis = 1024 num_hid = 1024 # initialize weights w_vh = cm.CUDAMatrix(0.1 * np.random.randn(num_vis, num_hid)) w_v = cm.CUDAMatrix(np.zeros((num_vis, 1))) w_h = cm.CUDAMatrix(-4.*np.ones((num_hid, 1)))
def rbmPredict(m, X): """using trained rbm model to do prediction""" nClass = m.labels.size numCase = X.shape[0] # This part is executed on CPU # define the free energy # FF = np.zeros((numCase, nClass)) # FFcol = np.zeros((numCase, 1)) # for index in range(nClass) : # temp = np.zeros((numCase, nClass)) # temp[:, index] = 1 # # tt = np.emath.log(np.exp(np.dot(X, m.weight)+ np.dot(temp, m.weightLabel) + m.biasH)+1) # # FFcol = temp[:,index] * m.biasLabel[0,index] + np.sum(tt,axis = 1) # # FF[:, index] = FFcol # # [x, y] = np.where(np.abs(FF - np.max(FF, axis=1, keepdims=True)) < 1e-5) # result = np.zeros(y.shape) # for index in range(y.size) : # result[index] = m.labels[y[index]] # The following part runs on GPU cm.cublas_init() # copy data to GPU data = cm.CUDAMatrix(cm.reformat(X)) weight = cm.CUDAMatrix(cm.reformat(m.weight)) biasH = cm.CUDAMatrix(cm.reformat(m.biasH)) weightLabel = cm.CUDAMatrix(cm.reformat(m.weightLabel)) biasLabel = cm.CUDAMatrix(cm.reformat(m.biasLabel)) F = cm.CUDAMatrix(np.zeros((numCase, nClass))) Fcol = cm.CUDAMatrix(np.zeros((numCase, 1))) temp = cm.CUDAMatrix(np.zeros((numCase, nClass))) tt = cm.CUDAMatrix(np.zeros((numCase, biasH.asarray().size))) for index in range(nClass): temp.assign(0) temp.set_col_slice(index, index + 1, 1) tt = cm.dot(data, weight) tt.add_dot(temp, weightLabel) tt.add_row_vec(biasH) cm.log_1_plus_exp(tt, target=tt, exact=True) Fcol = cm.sum(tt, axis=1) Fcol.add_mult(temp.get_col_slice(index, index + 1), biasLabel.numpy_array[0, index]) F.set_col_slice(index, index + 1, Fcol) tt.free_device_memory() F.copy_to_host() [x, y] = np.where(np.abs(F.numpy_array - np.max(F.numpy_array, axis=1, keepdims=True)) < 1e-5) # free device memory data.free_device_memory() weight.free_device_memory() biasH.free_device_memory() biasLabel.free_device_memory() weightLabel.free_device_memory() F.free_device_memory() Fcol.free_device_memory() temp.free_device_memory() cm.shutdown() result = np.zeros(y.shape) for index in range(y.size): result[index] = m.labels[y[index]] return [result, F.numpy_array]
def init_weight_storage(self): for name in self.weightVariableNames(): w = self.__dict__[name] self.__dict__[name] = cm.CUDAMatrix(reformat(w)) self.__dict__["d"+name] = cm.CUDAMatrix(reformat(0.0 * w))
def rbm(X, numHid, **kwargs): """ rbm defination data : when type is BB, should be binary, or in [0,1] to be interpreted as probabilities when type is GB, should be continuous real value. data should have a format of *.npy numHid : number nodes of and hidden layer type : rbm type, can be set as BB or GB method CD or SML eta learning rate momentum momentum for smoothness amd to prevent overfitting NOTE: momentum is not recommended with SML maxepoch # of epochs: each is a full pass through train data avglast how many epochs before maxepoch to start averaging before. Procedure suggested for faster convergence by Kevin Swersky in his MSc thesis batchsize The number of training instances per batch verbose For printing progress model.type Type of RBM (i.e. type of its visible and hidden units) model.weight The weights of the connections model.biasH The biases of the hidden layer model.biasV The biases of the visible layer model.top The activity of the top layer, to be used when training DBN's errors The errors in reconstruction at every epoch """ # when compute the transpose of a matrix, using the method *.transpose() is much space consuming. I suggest we can use # .T atrribute instead arg = util.processOptions(kwargs, \ modelType = "BB", \ method = "CD", \ eta = 0.1, \ momentum = 0.5,\ maxEpoch = 500, \ avgLast = 0, \ penalty = 0, \ batchSize = 100, \ verbose = True) [modelType, method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose] = [\ arg["modelType"], \ arg["method"],\ arg["eta"],\ arg["momentum"],\ arg["maxEpoch"],\ arg["avgLast"],\ arg["penalty"],\ arg["batchSize"],\ arg["verbose"] ] # from which step, we start to compute the average # avgStart = maxEpoch - avgLast # for weight decay use # oldPenalty = penalty # numCases : number of example # numDims : the length of each example # each row is an example [numCases, numDims] = list(X.shape) if verbose: print "processing data" numVis = numDims numBatch = util.ceil(numCases, batchSize) # shuffle the data data = copy.deepcopy(X) np.random.shuffle(data) # init CUDA # cm.cuda_set_device() cm.cublas_init() cm.CUDAMatrix.init_random(100) deviceData = cm.CUDAMatrix(cm.reformat(data)) # init weights weight = cm.CUDAMatrix(0.1 * np.random.randn(numVis, numHid)) biasV = cm.CUDAMatrix(np.zeros((1, numVis))) biasH = cm.CUDAMatrix(np.zeros((1, numHid))) # init weight update weightInc = cm.CUDAMatrix(np.zeros((numVis, numHid))) biasVInc = cm.CUDAMatrix(np.zeros((1, numVis))) biasHInc = cm.CUDAMatrix(np.zeros((1, numHid))) #init temporary storage visActP = cm.empty((batchSize, numVis)) hidActP = cm.empty((batchSize, numHid)) hidState = cm.empty((batchSize, numHid)) for epoch in range(maxEpoch): error = [] for batch in range(numBatch): # train each data batch if batchSize * (batch + 1) > numCases: visTrue = deviceData.get_row_slice(batchSize * batch, numCases) batchSize = visTrue.shape[0] visActP = cm.empty((batchSize, numVis)) hidActP = cm.empty((batchSize, numHid)) hidState = cm.empty((batchSize, numHid)) else: visTrue = deviceData.get_row_slice(batchSize * batch, batchSize * (batch + 1)) batchSize = visTrue.shape[0] visActP.assign(visTrue) #apply momentum weightInc.mult(momentum) biasVInc.mult(momentum) biasHInc.mult(momentum) # positive phase cm.dot(visActP, weight, target=hidActP) hidActP.add_row_vec(biasH) hidActP.apply_sigmoid() weightInc.add_dot(visActP.T, hidActP) biasVInc.add_sums(visActP, axis=0) biasHInc.add_sums(hidActP, axis=0) hidState.fill_with_rand() hidState.less_than(hidActP, target=hidActP) if cmp(method, "SML") == 0: if np.logical_and(np.equal(epoch, 1), np.equal(batch, 1)): pass # here does not need in practical use elif cmp(method, "CD") == 0: pass # negetive phase if cmp(modelType, "BB") == 0: cm.dot(hidActP, weight.T, target=visActP) visActP.add_row_vec(biasV) visActP.apply_sigmoid() elif cmp(modelType, "GB") == 0: cm.dot(hidActP, weight.T, target=visActP) visActP.add_row_vec(biasV) visActP.add(np.random.randn(batchSize, numVis), target=visActP) # another positive phase cm.dot(visActP, weight, target=hidActP) hidActP.add_row_vec(biasH) hidActP.apply_sigmoid() weightInc.subtract_dot(visActP.T, hidActP) biasVInc.add_sums(visActP, axis=0, mult=-1) biasHInc.add_sums(hidActP, axis=0, mult=-1) #update weight and bias weight.add_mult(weightInc, eta / batchSize) biasV.add_mult(biasVInc, eta / batchSize) biasH.add_mult(biasHInc, eta / batchSize) # if epoch > avgStart : # # apply average # weightAgv.subtract(weightAgv.subtract(weight).mult(1.0/t)) # biasVAgv.subtract(biasVAgv.subtract(biasV).mult(1.0/t)) # biasHAgv.subtract(biasHAgv.subtract(biasH).mult(1.0/t)) # t = t+1 # else : # weightAgv = weight # biasVAgv = biasV # biasHAgv = biasH # reconstruction error visTrue.subtract(visActP) error.append(visTrue.euclid_norm()**2) # free device memory visTrue.free_device_memory() if verbose: print "epoch %d/%d. Reconstruction error is %f " % ( epoch + 1, maxEpoch, sum(error)) # save rbm model top = cm.CUDAMatrix(np.zeros((numCases, numHid))) cm.dot(cm.CUDAMatrix(cm.reformat(X)), weight, target=top) top.add_row_vec(biasH) top.apply_sigmoid() weight.copy_to_host() biasV.copy_to_host() biasH.copy_to_host() top.copy_to_host() model_ = m.rbmModel(weight.numpy_array, biasV.numpy_array, \ biasH.numpy_array, type = modelType, top = top.numpy_array) # free device memory deviceData.free_device_memory() weight.free_device_memory() biasV.free_device_memory() biasH.free_device_memory() weightInc.free_device_memory() biasVInc.free_device_memory() biasHInc.free_device_memory() hidActP.free_device_memory() visActP.free_device_memory() hidState.free_device_memory() cm.shutdown() return model_
def initTemporary(self): self.hid = cm.CUDAMatrix(reformat(num.zeros((self.numHid, self.mbsz)))) self.out = cm.CUDAMatrix(reformat(num.zeros((self.numVis, self.mbsz)))) self.delta = cm.CUDAMatrix(reformat(num.zeros((self.numHid, self.mbsz)))) self.tempVisMB = cm.CUDAMatrix(reformat(num.zeros((self.numVis, self.mbsz))))
import time import numpy as np import cudamat as cm import util # initialize CUDA cm.cublas_init() cm.CUDAMatrix.init_random(1) # load data util.load("mnist.dat", globals()) dev_dat = cm.CUDAMatrix(cm.reformat(dat / 255.0)) # training parameters epsilon = 0.1 momentum = 0.9 num_epochs = 3 batch_size = 128 num_batches = dat.shape[1] / batch_size # model parameters num_vis = dat.shape[0] num_hid = 4096 # initialize weights w_vh = cm.CUDAMatrix(0.1 * np.random.randn(num_vis, num_hid)) w_v = cm.CUDAMatrix(np.zeros((num_vis, 1))) w_h = cm.CUDAMatrix(-4.0 * np.ones((num_hid, 1))) # initialize weight updates
def rbmFit(X, numHid, y, isSaveModel=False, name=None, **kwargs) : """ X ... data. should be binary, or in [0,1] interpreted as ... probabilities numhid ... number of hidden units y ... List of discrete labels nClass number of classes method CD or SML eta learning rate momentum momentum for smoothness amd to prevent overfitting NOTE: momentum is not recommended with SML maxepoch # of epochs: each is a full pass through train data avglast how many epochs before maxepoch to start averaging before. Procedure suggested for faster convergence by Kevin Swersky in his MSc thesis batchsize The number of training instances per batch verbose For printing progress model.weight The weights of the connections model.biasH The biases of the hidden layer model.biasV The biases of the visible layer model.weightlabel ... The weights on labels layer model.biasLabel ... The biases on labels layer errors The errors in reconstruction at each epoch """ arg = util.processOptions(kwargs, \ nClass = np.unique(y).size, \ method = "CD", \ eta = 0.1, \ momentum = 0.5,\ maxEpoch = 500, \ avgLast = 0, \ penalty = 0, \ batchSize = 100, \ verbose = True) [nClass, method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose] = [\ arg["nClass"],\ arg["method"],\ arg["eta"],\ arg["momentum"],\ arg["maxEpoch"],\ arg["avgLast"],\ arg["penalty"],\ arg["batchSize"],\ arg["verbose"] ] if verbose : print "Processing data ..." # from which step, we start to compute the average # avgStart = maxEpoch - avgLast # for weight decay use # oldPenalty = penalty # numCases : number of example # numDims : the length of each example # each row is an example [numCases, numDims] = list(X.shape) numVis = numDims uniqueLabel = np.unique(y) numBatch = util.ceil(numCases, batchSize) y = util.matrixLabel(y) # shuffle data and label data = copy.deepcopy(X) [data, label] = util.shuffle(data, y) # init CUDA cm.cublas_init() cm.CUDAMatrix.init_random(100) deviceData = cm.CUDAMatrix(cm.reformat(data)) deviceLabel = cm.CUDAMatrix(cm.reformat(label)) # init weights weight = cm.CUDAMatrix(0.1*np.random.randn(numVis,numHid)) biasV = cm.CUDAMatrix(np.zeros((1, numVis))) biasH = cm.CUDAMatrix(np.zeros((1, numHid))) weightLabel = cm.CUDAMatrix(0.1*np.random.randn(nClass, numHid)) biasLabel = cm.CUDAMatrix(np.zeros((1,nClass))) # init weight update weightInc = cm.CUDAMatrix(np.zeros((numVis,numHid))) biasVInc = cm.CUDAMatrix(np.zeros((1,numVis))) biasHInc = cm.CUDAMatrix(np.zeros((1,numHid))) weightLabelInc = cm.CUDAMatrix(np.zeros((nClass, numHid))) biasLabelInc = cm.CUDAMatrix(np.zeros((1,nClass))) #init temporary storage visActP = cm.empty((batchSize, numVis)) hidActP = cm.empty((batchSize, numHid)) hidState = cm.empty((batchSize, numHid)) for epoch in range(maxEpoch) : error = [] for batch in range(numBatch) : # train each data batch if batchSize*(batch+1) > numCases : visTrue = deviceData.get_row_slice(batchSize*batch, numCases) labelTrue = deviceLabel.get_row_slice(batchSize*batch, numCases) batchSize = visTrue.shape[0] visActP = cm.empty((batchSize, numVis)) hidActP = cm.empty((batchSize, numHid)) hidState = cm.empty((batchSize, numHid)) else : visTrue = deviceData.get_row_slice(batchSize*batch, batchSize*(batch+1)) labelTrue = deviceLabel.get_row_slice(batchSize*batch, batchSize*(batch+1)) batchSize = visTrue.shape[0] visActP.assign(visTrue) #apply momentum weightInc.mult(momentum) biasVInc.mult(momentum) biasHInc.mult(momentum) weightLabel.mult(momentum) biasLabel.mult(momentum) # positive phase cm.dot(visActP, weight, target = hidActP) hidActP.add_dot(labelTrue, weightLabel) hidActP.add_row_vec(biasH) hidActP.apply_sigmoid() weightInc.add_dot(visActP.T, hidActP) biasVInc.add_sums(visActP, axis=0) biasHInc.add_sums(hidActP, axis=0) weightLabelInc.add_dot(labelTrue.T, hidActP) biasLabelInc.add_sums(labelTrue, axis=0) hidState.fill_with_rand() hidState.less_than(hidActP, target=hidActP) if cmp(method, "SML") == 0 : if np.logical_and(np.equal(epoch,1), np.equal(batch,1)) : pass # here does not need in practical use elif cmp(method, "CD") == 0 : pass # negative phase cm.dot(hidActP, weight.T, target = visActP) visActP.add_row_vec(biasV) visActP.apply_sigmoid() cm.dot(hidActP, weightLabel.T, target = labelTrue) labelTrue.add_row_vec(biasLabel) labelTrue = util.softmax(labelTrue) # another positive phase cm.dot(visActP, weight, target = hidActP) hidActP.add_dot(labelTrue, weightLabel) hidActP.add_row_vec(biasH) hidActP.apply_sigmoid() weightInc.subtract_dot(visActP.T, hidActP) biasVInc.add_sums(visActP, axis=0, mult=-1) biasHInc.add_sums(hidActP, axis=0, mult=-1) weightLabelInc.subtract_dot(labelTrue.T, hidActP) biasLabelInc.add_sums(labelTrue, axis=0, mult=-1) # update weights and bias weight.add_mult(weightInc, eta/batchSize) biasV.add_mult(biasVInc, eta/batchSize) biasH.add_mult(biasHInc, eta/batchSize) weightLabel.add_mult(weightLabelInc, eta/batchSize) biasLabel.add_mult(biasLabelInc, eta/batchSize) # calculate reconstruction error visTrue.subtract(visActP) error.append(visTrue.euclid_norm()**2) # free memory visTrue.free_device_memory() labelTrue.free_device_memory() if verbose : print "Epoch %d/%d, reconstruction error is %f " % (epoch+1, maxEpoch, sum(error)) # save rbm model weight.copy_to_host() biasV.copy_to_host() biasH.copy_to_host() weightLabel.copy_to_host() biasLabel.copy_to_host() model_ = m.rbmModel(weight.numpy_array, biasV.numpy_array, biasH.numpy_array, \ weightLabel = weightLabel.numpy_array,\ biasLabel = biasLabel.numpy_array, labels = uniqueLabel) # free device memory deviceData.free_device_memory() deviceLabel.free_device_memory() weight.free_device_memory() biasV.free_device_memory() biasH.free_device_memory() weightLabel.free_device_memory() biasLabel.free_device_memory() weightInc.free_device_memory() biasVInc.free_device_memory() biasHInc.free_device_memory() weightLabelInc.free_device_memory() biasLabelInc.free_device_memory() hidActP.free_device_memory() visActP.free_device_memory() hidState.free_device_memory() cm.shutdown() if isSaveModel : modelList = [] modelList.append(model_) model = np.array(modelList) np.save(name,model) return model_
# shuffle data np.random.shuffle(dat_train) dat_train = dat_train.T dlog(' Go through dat_train') for batch_pack_inx in range(batch_packs_train): dlog(' batch_pack_inx = %i' % batch_pack_inx) dat_tmp = dat_train[:, ( batch_pack_inx * batch_size * batches_in_free_mem):((batch_pack_inx + 1) * batch_size * batches_in_free_mem)] if dat_tmp.shape[1] == 0: break try: dev_dat_train = cm.CUDAMatrix(cm.reformat(dat_tmp)) except Exception as e: print 'CUDAMAT ERROR: ' + e.message cm.cublas_shutdown() exit(0) dlog(' dev_dat_train.shape = [%s]' % ', '.join(map(lambda x: str(x), dev_dat_train.shape))) num_batches_train = dev_dat_train.shape[1] / batch_size for batch in range(num_batches_train): # sample dropout if options.drop_out is not None: do_h.fill_with_rand()
mat=mat_in for i in mat.shape[0]: if max(mat[i])>0: mat[i]=mat[i]/max(mat[i]) return mat dat=np.genfromtxt('/Users/danielcarlin/Data/GTEx/first_200.tab', names=True,dtype='float8') # initialize CUDA cm.cublas_init() cm.CUDAMatrix.init_random(1) # load data dev_dat = cm.CUDAMatrix(cm.reformat(scaleRows(dat))) # training parameters epsilon = 0.1 momentum = 0.9 num_epochs = 30 batch_size = 128 num_batches = dat.shape[1]//batch_size # model parameters num_vis = dat.shape[0] num_hid = 4096 # initialize weights w_vh = cm.CUDAMatrix(0.1 * np.random.randn(num_vis, num_hid))
import time import numpy as np import cudamat as cm import util # initialize CUDA cm.cublas_init() cm.CUDAMatrix.init_random(1) # load data #util.load('mnist.dat', globals()) #dev_dat = cm.CUDAMatrix(cm.reformat(dat/255.)) dev_dat = cm.CUDAMatrix(cm.reformat(np.random.rand(1024, 6000))) # training parameters epsilon = 0.1 momentum = 0.9 num_epochs = 1 batch_size = 1 num_batches = 6000 / batch_size # model parameters num_vis = 1024 num_hid = 1024 # initialize weights w_vh = cm.CUDAMatrix(0.1 * np.random.randn(num_vis, num_hid)) w_v = cm.CUDAMatrix(np.zeros((num_vis, 1))) w_h = cm.CUDAMatrix(-4. * np.ones((num_hid, 1)))