def columnNorms(mat, tempMat, result): assert (mat.shape == tempMat.shape) assert (result.shape == (1, mat.shape[1])) #cm.pow(mat, 2, target = tempMat) mat.mult(mat, target=tempMat) tempMat.sum(axis=0, target=result) cm.sqrt(result)
def columnNorms(mat, tempMat, result): assert(mat.shape == tempMat.shape) assert(result.shape == (1, mat.shape[1])) #cm.pow(mat, 2, target = tempMat) mat.mult(mat, target = tempMat) tempMat.sum(axis = 0, target = result) cm.sqrt(result)
def acceleration(self): #this sets self.hActProbs and self.normalizedVisMB and self.sqColLens self.hidActProbs(vis = self.negVis) cm.dot(self.factToHid, self.hActProbs, target = self.tempFactMB) self.tempFactMB.mult(-1) self.tempFactMB.mult(self.factResponses) cm.dot(self.visToFact, self.tempFactMB, target = self.normalizedAccel) #rename some things to be like Marc'Aurelio's code: normcoeff = self.tempRow2 lengthsq = self.tempRow #these next few lines repeat some work, but it is too confusing to cache all this stuff at the moment self.sqColLens.mult(1.0/self.numVis, target = lengthsq) lengthsq.add(small) #self.tempRow is what Marc'Aurelio calls lengthsq cm.sqrt(lengthsq, target = normcoeff) normcoeff.mult(lengthsq) #now self.tempRow2 has what Marc'Aurelio calls normcoeff normcoeff.reciprocal() self.normalizedAccel.mult(self.negVis, target = self.tempVisMB) self.tempVisMB.sum(axis=0, target = self.tempRow3) #this tempRow stuff is getting absurd self.tempRow3.mult(-1.0/self.numVis) self.negVis.mult_by_row(self.tempRow3, target = self.tempVisMB) self.normalizedAccel.mult_by_row(lengthsq, target = self.accel) self.accel.add(self.tempVisMB) self.accel.mult_by_row(normcoeff) #quadratic in v term contribution to gradient self.accel.add(self.negVis) self.accel.mult(2) #all parts before this point have a 2 show up because of differentiation #vis bias contribution self.accel.add_col_mult(self.visBias, -1)
def normalizeInputData(vis, tempVis, sqColLens, normalizer, normalizedVis): """ Our input is vis and our outputs are sqColLens, normalizer, and normalizedVis. We clobber tempVis. """ numVis, mbsz = vis.shape assert (sqColLens.shape == (1, mbsz)) assert (sqColLens.shape == normalizer.shape) assert (tempVis.shape == vis.shape == normalizedVis.shape) vis.mult(vis, target=tempVis) tempVis.sum(axis=0, target=sqColLens) sqColLens.mult(1.0 / numVis, target=normalizer) normalizer.add(small) cm.sqrt(normalizer) normalizer.reciprocal() vis.mult_by_row(normalizer, target=normalizedVis)
def normalizeInputData(vis, tempVis, sqColLens, normalizer, normalizedVis): """ Our input is vis and our outputs are sqColLens, normalizer, and normalizedVis. We clobber tempVis. """ numVis, mbsz = vis.shape assert(sqColLens.shape == (1, mbsz)) assert(sqColLens.shape == normalizer.shape) assert(tempVis.shape == vis.shape == normalizedVis.shape) vis.mult(vis, target = tempVis) tempVis.sum(axis = 0, target = sqColLens) sqColLens.mult(1.0/numVis, target = normalizer) normalizer.add(small) cm.sqrt(normalizer) normalizer.reciprocal() vis.mult_by_row(normalizer, target = normalizedVis)
def test_sqrt(): m = 256 n = 128 a = np.array(np.random.rand(m, n)*20, dtype=np.float32, order='F') b = np.array(np.random.rand(m, n), dtype=np.float32, order='F') c = np.sqrt(a) m1 = cm.CUDAMatrix(a) m2 = cm.CUDAMatrix(b) cm.sqrt(m1, target = m2) cm.sqrt(m1) m1.copy_to_host() m2.copy_to_host() assert np.max(np.abs(c - m1.numpy_array)) < 10**-4, "Error in cudamat.sqrt exceeded threshold" assert np.max(np.abs(c - m2.numpy_array)) < 10**-4, "Error in cudamat.sqrt exceeded threshold"
def compute_energy_mcRBM_visual(self, data, normdata, energy, VF, FH, bias_cov, bias_vis, w_mean, bias_mean, t1, t2, t6, feat, featsq, feat_mean, length, lengthsq, normcoeff, small, num_vis): # normalize input data vectors data.mult(data, target=t6) # DxP (nr input dims x nr samples) t6.sum(axis=0, target=lengthsq) # 1xP lengthsq.mult(0.5, target=energy) # energy of quadratic regularization term lengthsq.mult(1. / num_vis) # normalize by number of components (like std) lengthsq.add(small) # small prevents division by 0 cmt.sqrt(lengthsq, target=length) length.reciprocal(target=normcoeff) # 1xP data.mult_by_row(normcoeff, target=normdata) # normalized data ## potential # covariance contribution cmt.dot(VF.T, normdata, target=feat) # HxP (nr factors x nr samples) feat.mult(feat, target=featsq) # HxP cmt.dot(FH.T, featsq, target=t1) # OxP (nr cov hiddens x nr samples) t1.mult(-0.5) t1.add_col_vec(bias_cov) # OxP cmt.exp(t1) # OxP t1.add(1, target=t2) # OxP cmt.log(t2) t2.mult(-1) energy.add_sums(t2, axis=0) # mean contribution cmt.dot(w_mean.T, data, target=feat_mean) # HxP (nr mean hiddens x nr samples) feat_mean.add_col_vec(bias_mean) # HxP cmt.exp(feat_mean) feat_mean.add(1) cmt.log(feat_mean) feat_mean.mult(-1) energy.add_sums(feat_mean, axis=0) # visible bias term data.mult_by_col(bias_vis, target=t6) t6.mult(-1) # DxP energy.add_sums(t6, axis=0) # 1xP # kinetic data.mult(data, target=t6) energy.add_sums(t6, axis=0, mult=.5)
def run(self, iterations): for i in range(0,iterations): # F = XG(G.T G)^-1 cm.dot(self.G_gpu.T, self.G_gpu, target=self.GTG_gpu) try: self.GTGpinv_gpu = cm.CUDAMatrix(np.linalg.inv( self.GTG_gpu.asarray())) except LinAlgError: self.GTGpinv_gpu = cm.CUDAMatrix(np.linalg.pinv( self.GTG_gpu.asarray())) cm.dot(self.X_gpu, self.G_gpu, target=self.XG_gpu) cm.dot(self.XG_gpu, self.GTGpinv_gpu, target=self.F_gpu) # preparation and calculation of the matrix separations cm.dot(self.X_gpu.T, self.F_gpu, target=self.XTF_gpu) cm.dot(self.F_gpu.T, self.F_gpu, target=self.FTF_gpu) self.XTF_gpu.greater_than(0, target=self.XTFgreater_gpu) self.XTF_gpu.mult(self.XTFgreater_gpu, target=self.XTFpos_gpu) self.XTFpos_gpu.subtract(self.XTF_gpu, target=self.XTFneg_gpu) self.FTF_gpu.greater_than(0, target=self.FTFgreater_gpu) self.FTF_gpu.mult(self.FTFgreater_gpu, target=self.FTFpos_gpu) self.FTFpos_gpu.subtract(self.FTF_gpu, target=self.FTFneg_gpu) # compute the G update cm.dot(self.G_gpu, self.FTFpos_gpu, target=self.GFTFpos_gpu) cm.dot(self.G_gpu, self.FTFneg_gpu, target=self.GFTFneg_gpu) self.XTFpos_gpu.add(self.GFTFneg_gpu) self.XTFneg_gpu.add(self.GFTFpos_gpu) self.XTFpos_gpu.add_scalar(10**-9) self.XTFneg_gpu.add_scalar(10**-9) self.XTFpos_gpu.divide(self.XTFneg_gpu) cm.sqrt(self.XTFpos_gpu) self.G_gpu.mult(self.XTFpos_gpu) # test for convergence if (i % self.niter_test_conv == 0) and self.checkConvergence(): print "NMF converged after %i iterations" % i break
def pairwiseEuclideanGPU(a, b, returnAsGPU=False, squared=False): """ Compute the pairwise euclidean distance between matrices a and b. Parameters ---------- a : np.ndarray (n, f) first matrice b : np.ndarray (m, f) second matrice returnAsGPU : boolean, optional (default False) if True, returns cudamat matrix still on GPU, else return np.ndarray squared : boolean, optional (default False) if True, return squared euclidean distance matrice Returns ------- c : (n x m) np.ndarray or cudamat.CUDAMatrix pairwise euclidean distance distance matrix """ # a is shape (n, f) and b shape (m, f). Return matrix c of shape (n, m). # First compute in c_GPU the squared euclidean distance. And return its # square root. At each cell [i,j] of c, we want to have # sum{k in range(f)} ( (a[i,k] - b[j,k])^2 ). We know that # (a-b)^2 = a^2 -2ab +b^2. Thus we want to have in each cell of c: # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] +b[j,k]^2). a_GPU = cudamat.CUDAMatrix(a) b_GPU = cudamat.CUDAMatrix(b) # Multiply a by b transpose to obtain in each cell [i,j] of c the # value sum{k in range(f)} ( a[i,k]b[j,k] ) c_GPU = cudamat.dot(a_GPU, b_GPU.transpose()) # multiply by -2 to have sum{k in range(f)} ( -2a[i,k]b[j,k] ) c_GPU.mult(-2) # Compute the vectors of the sum of squared elements. a_GPU = cudamat.pow(a_GPU, 2).sum(axis=1) b_GPU = cudamat.pow(b_GPU, 2).sum(axis=1) # Add the vectors in each columns (respectivly rows) of c. # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] ) c_GPU.add_col_vec(a_GPU) # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] +b[j,k]^2) c_GPU.add_row_vec(b_GPU.transpose()) if not squared: c_GPU = cudamat.sqrt(c_GPU) if returnAsGPU: return c_GPU else: return c_GPU.asarray()
def compute_gradient_mcRBM(self, data, normdata, VF, FH, bias_cov, bias_vis, w_mean, bias_mean, t1, t2, t3, t4, t6, feat, featsq, feat_mean, gradient, normgradient, length, lengthsq, normcoeff, small, num_vis): # normalize input data data.mult(data, target=t6) # DxP t6.sum(axis=0, target=lengthsq) # 1xP lengthsq.mult(1. / num_vis) # normalize by number of components (like std) lengthsq.add(small) cmt.sqrt(lengthsq, target=length) length.reciprocal(target=normcoeff) # 1xP data.mult_by_row(normcoeff, target=normdata) # normalized data cmt.dot(VF.T, normdata, target=feat) # HxP feat.mult(feat, target=featsq) # HxP cmt.dot(FH.T, featsq, target=t1) # OxP t1.mult(-.5) t1.add_col_vec(bias_cov) # OxP t1.apply_sigmoid(target=t2) # OxP cmt.dot(FH, t2, target=t3) # HxP t3.mult(feat) cmt.dot(VF, t3, target=normgradient) # VxP # final bprop through normalization length.mult(lengthsq, target=normcoeff) normcoeff.reciprocal() # 1xP normgradient.mult(data, target=gradient) # VxP gradient.sum(axis=0, target=t4) # 1xP t4.mult(-1. / num_vis) data.mult_by_row(t4, target=gradient) normgradient.mult_by_row(lengthsq, target=t6) gradient.add(t6) gradient.mult_by_row(normcoeff) # add quadratic term gradient gradient.add(data) # add visible bias term gradient.add_col_mult(bias_vis, -1) # add MEAN contribution to gradient cmt.dot(w_mean.T, data, target=feat_mean) # HxP feat_mean.add_col_vec(bias_mean) # HxP feat_mean.apply_sigmoid() # HxP gradient.subtract_dot(w_mean, feat_mean) # VxP
def acceleration(self): #this sets self.hActProbs and self.normalizedVisMB and self.sqColLens self.hidActProbs(vis=self.negVis) cm.dot(self.factToHid, self.hActProbs, target=self.tempFactMB) self.tempFactMB.mult(-1) self.tempFactMB.mult(self.factResponses) cm.dot(self.visToFact, self.tempFactMB, target=self.normalizedAccel) #rename some things to be like Marc'Aurelio's code: normcoeff = self.tempRow2 lengthsq = self.tempRow #these next few lines repeat some work, but it is too confusing to cache all this stuff at the moment self.sqColLens.mult(1.0 / self.numVis, target=lengthsq) lengthsq.add(small) #self.tempRow is what Marc'Aurelio calls lengthsq cm.sqrt(lengthsq, target=normcoeff) normcoeff.mult( lengthsq) #now self.tempRow2 has what Marc'Aurelio calls normcoeff normcoeff.reciprocal() self.normalizedAccel.mult(self.negVis, target=self.tempVisMB) self.tempVisMB.sum( axis=0, target=self.tempRow3) #this tempRow stuff is getting absurd self.tempRow3.mult(-1.0 / self.numVis) self.negVis.mult_by_row(self.tempRow3, target=self.tempVisMB) self.normalizedAccel.mult_by_row(lengthsq, target=self.accel) self.accel.add(self.tempVisMB) self.accel.mult_by_row(normcoeff) #quadratic in v term contribution to gradient self.accel.add(self.negVis) self.accel.mult( 2 ) #all parts before this point have a 2 show up because of differentiation #vis bias contribution self.accel.add_col_mult(self.visBias, -1)
def run(self, iterations): for i in range(0,iterations): cm.dot(self.XTXneg_gpu, self.W_gpu, target=self.XTXnegW_gpu) cm.dot(self.XTXpos_gpu, self.W_gpu, target=self.XTXposW_gpu) # Update G cm.dot(self.G_gpu, self.W_gpu.T, target=self.GWT_gpu) # G *= np.sqrt((XTXposW + np.dot(GWT, XTXnegW)) # /(XTXnegW+np.dot(GWT, XTXposW))) cm.dot(self.GWT_gpu, self.XTXnegW_gpu, target=self.update1_gpu) cm.dot(self.GWT_gpu, self.XTXposW_gpu, target=self.update2_gpu) self.update1_gpu.add(self.XTXposW_gpu) self.update2_gpu.add(self.XTXnegW_gpu) self.update2_gpu.add_scalar(10**-9) self.update1_gpu.divide(self.update2_gpu) cm.sqrt(self.update1_gpu) self.G_gpu.mult(self.update1_gpu) # Update W cm.dot(self.G_gpu.T, self.G_gpu, target=self.GTG_gpu) #W *= np.sqrt((np.dot(XTXpos, G) + np.dot(XTXnegW, GTG)) # / (np.dot(XTXneg, G) # + np.dot(XTXposW, GTG))) cm.dot(self.XTXpos_gpu, self.G_gpu, target=self.XTXposG_gpu) cm.dot(self.XTXneg_gpu, self.G_gpu, target=self.XTXnegG_gpu) cm.dot(self.XTXnegW_gpu, self.GTG_gpu, target=self.update1_gpu) cm.dot(self.XTXposW_gpu, self.GTG_gpu, target=self.update2_gpu) self.update1_gpu.add(self.XTXposG_gpu) self.update2_gpu.add(self.XTXnegG_gpu) self.update2_gpu.add_scalar(10**-9) self.update1_gpu.divide(self.update2_gpu) cm.sqrt(self.update1_gpu) self.W_gpu.mult(self.update1_gpu) # test for convergence if (i % self.niter_test_conv == 0) and self.checkConvergence(): print "NMF converged after %i iterations" % i break
def normolize(feat): #feat_temp = np.vstack((feat, feat)) feat = np.reshape(feat, (4096, 1)) a = cm.CUDAMatrix(feat) c = cm.dot(a.T, a) c = cm.sqrt(c) c = c.asarray() feat = feat / c[0] ''' for index,item in enumerate(feat): feat[index,:]=item/(c[index][index]) ''' return feat
def update(self, lr): if self.use_momentum: self.weights_update.mult(self.momentum) self.weights_update.subtract_mult(self.weights_grad, lr) self.weights.add(self.weights_update) if self.use_bias: self.biases_update.mult(self.momentum) self.biases_update.subtract_mult(self.biases_grad, lr) self.biases.add(self.biases_update) elif self.use_rmsprop: self.weights_rmsprop_cache.mult(self.rmsprop_dr) cm.pow(self.weights_grad, self.weights_grad_square) self.weights_grad_square.mult(1.0 - self.rmsprop_dr) self.weights_rmsprop_cache.add(self.weights_grad_square) self.weights_rmsprop_cache.add(1e-8) cm.sqrt(self.weights_rmsprop_cache) self.weights_grad.mult(lr).divide(self.weights_rmsprop_cache) self.weights.subtract(self.weights_grad) self.biases_rmsprop_cache.mult(self.rmsprop_dr) cm.pow(self.biases_grad, self.biases_grad_square) self.biases_grad_square.mult(1.0 - self.rmsprop_dr) self.biases_rmsprop_cache.add(self.biases_grad_square) self.biases_rmsprop_cache.add(1e-8) cm.sqrt(self.biases_rmsprop_cache) self.biases_grad.mult(lr).divide(self.biases_rmsprop_cache) self.biases.subtract(self.biases_grad) else: self.weights.subtract_mult(self.weights_grad, lr) if self.use_bias: self.biases.subtract_mult(self.biases_grad, lr) # Max-norm regularization. if self.use_max_norm: cm.pow(self.weights, 2, self.weights_square) self.weights_square.sum(0, self.weights_factor) cm.sqrt(self.weights_factor, self.weights_factor) # Avoid zero weight mags. self.weights_factor.add(1e-8) self.weights_factor.reciprocal().mult(self.max_norm_c) # Filter not factor greater than 1.0 self.weights_factor.less_than(1.0, self.weights_factor_mask) self.weights_factor.mult(self.weights_factor_mask) # Change 0.0 entry to 1.0. self.weights_factor_mask.less_than(1.0) self.weights_factor.add(self.weights_factor_mask) # Down scale over sized weights. self.weights.mult_by_row(self.weights_factor)
def optimization(M_u_u, M_u_f, M_t_f, L_u, L_t, S_u_u, S_u_u_D, S_t_t, S_t_t_D, alpha, beta, k, loss, num_step): m = M_u_u.shape[0] w = M_u_f.shape[1] n = M_t_f.shape[0] #random samples from a uniform distribution over [0,1) U = np.random.rand(m, k) # int8() would reduce the precision of float number, don't do it # U=np.int8(U) V = np.random.rand(n, k) W = np.random.rand(w, k) H1 = np.random.rand(k, k) H2 = np.random.rand(k, k) H3 = np.random.rand(k, k) M_u_u = cm.CUDAMatrix(M_u_u) #print(M_t_f) #print(np.sum(np.sum(M_t_f))) M_u_f = cm.CUDAMatrix(M_u_f) M_t_f = cm.CUDAMatrix(M_t_f) U = cm.CUDAMatrix(U) V = cm.CUDAMatrix(V) W = cm.CUDAMatrix(W) H1 = cm.CUDAMatrix(H1) H2 = cm.CUDAMatrix(H2) H3 = cm.CUDAMatrix(H3) L_u = cm.CUDAMatrix(L_u) L_t = cm.CUDAMatrix(L_t) S_u_u = cm.CUDAMatrix(S_u_u) S_u_u_D = cm.CUDAMatrix(S_u_u_D) S_t_t = cm.CUDAMatrix(S_t_t) S_t_t_D = cm.CUDAMatrix(S_t_t_D) pvalue = 0.00000000000001 step = 0 maxU = U.asarray() maxPurity = per.dealWith(maxU) while step < num_step: # M_t_f is ok now (didn't change along the process ) # M_t_f_n=M_t_f.asarray() # print(np.sum(np.sum(M_t_f_n))) # print('M_u_u') # print(M_u_u.asarray()) # print('M_u_f') # print(M_u_f.asarray()) # print('U') #print(U.asarray()) # print('V') # print(V.asarray()) # print('W') # print(W.asarray()) # print('H1') # print(H1.asarray()) # print('H2') # print(H2.asarray()) # print('H3') # print(H3.asarray()) t = targetFunction(M_u_u, M_u_f, M_t_f, L_u, L_t, U, V, W, H1, H2, H3, alpha, beta).asarray() print('loss: ' + str(t[0][0])) if t <= loss: break # print(S_u_u.asarray()) # print(cm.dot(S_u_u,U).asarray()) #print(L_u.asarray()) #print(manyDot([U.transpose(),L_u,U]).asarray()) # print(cm.dot(S_u_u_D,U).asarray()) #update U up = manyDot([M_u_u, U, H1.transpose() ]).add(manyDot([M_u_f, W, H3.transpose() ])).add(cm.dot(S_u_u, U).mult(alpha)) psaiU = manyDot([ U.transpose(), M_u_u, U, H1.transpose() ]).add(manyDot([ U.transpose(), M_u_f, W, H3.transpose() ])).subtract(manyDot([ H1, U.transpose(), U, H1.transpose() ])).subtract(manyDot([H3, W.transpose(), W, H3.transpose()])).subtract( manyDot([U.transpose(), L_u, U]).mult(alpha)) down = manyDot([ U, H1, U.transpose(), U, H1.transpose() ]).add(manyDot([U, H3, W.transpose(), W, H3.transpose() ])).add(cm.dot(S_u_u_D, U).mult(alpha)).add(cm.dot(U, psaiU)) #make zero plus something for divide size = down.shape plus = np.ones(size) * pvalue plus = cm.CUDAMatrix(plus) down.add(plus) #both multiply divide and sqrt are elment-wise up.divide(down) up_cpu = up.asarray() up.free_device_memory() for i in range(0, len(up_cpu)): for j in range(0, len(up_cpu[i])): if up_cpu[i][j] < 0: up_cpu[i][j] = 0 up = cm.CUDAMatrix(up_cpu) U.mult(cm.sqrt(up)) up.free_device_memory() psaiU.free_device_memory() down.free_device_memory() plus.free_device_memory() #print(M_u_u.asarray()) #update V up = manyDot([M_t_f, W, H2.transpose()]).add(cm.dot(S_t_t, V).mult(beta)) psaiV = manyDot([V.transpose(), M_t_f, W, H2.transpose()]).subtract( manyDot([H2, W.transpose(), W, H2.transpose()])).subtract( manyDot([V.transpose(), L_t, V]).mult(beta)) down = manyDot([V, H2, W.transpose(), W, H2.transpose() ]).add(cm.dot(S_t_t_D, V).mult(beta)).add(cm.dot(V, psaiV)) size = down.shape plus = np.ones(size) * pvalue plus = cm.CUDAMatrix(plus) down.add(plus) # print(down.asarray()) # print(V.asarray()) up.divide(down) up_cpu = up.asarray() up.free_device_memory() for i in range(0, len(up_cpu)): for j in range(0, len(up_cpu[i])): if up_cpu[i][j] < 0: up_cpu[i][j] = 0 up = cm.CUDAMatrix(up_cpu) V.mult(cm.sqrt(up)) #print(V.asarray()) up.free_device_memory() psaiV.free_device_memory() down.free_device_memory() plus.free_device_memory() #update W up = manyDot([M_t_f.transpose(), V, H2]).add(manyDot([M_u_f.transpose(), U, H3])) down = manyDot([W, H2.transpose(), V.transpose(), V, H2]).add( manyDot([W, H3.transpose(), U.transpose(), U, H3])) size = down.shape plus = np.ones(size) * pvalue plus = cm.CUDAMatrix(plus) down.add(plus) W.mult(cm.sqrt(up.divide(down))) up.free_device_memory() down.free_device_memory() plus.free_device_memory() #update H1 up = manyDot([U.transpose(), M_u_u, U]) down = manyDot([U.transpose(), U, H1, U.transpose(), U]) size = down.shape plus = np.ones(size) * pvalue plus = cm.CUDAMatrix(plus) down.add(plus) #print(H1) H1.mult(cm.sqrt(up.divide(down))) #print(H1) up.free_device_memory() down.free_device_memory() plus.free_device_memory() #update H2 up = manyDot([V.transpose(), M_t_f, W]) down = manyDot([V.transpose(), V, H2, W.transpose(), W]) size = down.shape plus = np.ones(size) * pvalue plus = cm.CUDAMatrix(plus) down.add(plus) H2.mult(cm.sqrt(up.divide(down))) up.free_device_memory() down.free_device_memory() plus.free_device_memory() #update H3 up = manyDot([U.transpose(), M_u_f, W]) down = manyDot([U.transpose(), U, H3, W.transpose(), W]) size = down.shape plus = np.ones(size) * pvalue plus = cm.CUDAMatrix(plus) down.add(plus) H3.mult(cm.sqrt(up.divide(down))) up.free_device_memory() down.free_device_memory() plus.free_device_memory() step = step + 1 print('step: ' + str(step)) purity = per.dealWith(U.asarray()) if purity > maxPurity: #print('ex') maxPurity = purity U_c = U.copy() maxU = U_c.asarray() U_c.free_device_memory() # maxU=U.asarray() maxU would keep up with U in gpu t = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) #print(t) np.save('hashtag/U/U-' + t.replace(' ', '-') + '.npy', maxU) print('Max Purity during this process: ' + str(maxPurity))
def NMFsemi(X, r, iterations=1000, G=None, niter_test_conv=10, stop_threshold=40): n = np.size(X, 0) m = np.size(X, 1) if G is None: G = np.random.random((m, r)).astype(np.float32) elif G.strides[1] > G.strides[0]: # this is a check wether the data array is correct and not a transpose # of some other array that is hard to process (due to strides problem) G = G.copy() # allocate the matrices on the GPU G_gpu = cm.CUDAMatrix(G) F_gpu = cm.empty((n, r)) X_gpu = cm.CUDAMatrix(X) GTG_gpu = cm.empty((r, r)) GTGpinv_gpu = cm.empty((r, r)) XG_gpu = cm.empty((n, r)) XTF_gpu = cm.empty((m, r)) FTF_gpu = cm.empty((r, r)) XTFgreater_gpu = cm.empty((m, r)) FTFgreater_gpu = cm.empty((r, r)) XTFpos_gpu = cm.empty((m, r)) XTFneg_gpu = cm.empty((m, r)) FTFpos_gpu = cm.empty((r, r)) FTFneg_gpu = cm.empty((r, r)) GFTFneg_gpu = cm.empty((m, r)) GFTFpos_gpu = cm.empty((m, r)) const = 0 oldExposures = np.argmax(G, axis=0) for i in range(iterations): # F = XG(G.T G)^-1 cm.dot(G_gpu.T, G_gpu, target=GTG_gpu) try: GTGpinv_gpu = cm.CUDAMatrix(np.linalg.inv(GTG_gpu.asarray())) except LinAlgError: GTGpinv_gpu = cm.CUDAMatrix(np.linalg.pinv(GTG_gpu.asarray())) cm.dot(X_gpu, G_gpu, target=XG_gpu) cm.dot(XG_gpu, GTGpinv_gpu, target=F_gpu) # preparation and calculation of the matrix separations cm.dot(X_gpu.T, F_gpu, target=XTF_gpu) cm.dot(F_gpu.T, F_gpu, target=FTF_gpu) XTF_gpu.greater_than(0, target=XTFgreater_gpu) XTF_gpu.mult(XTFgreater_gpu, target=XTFpos_gpu) XTFpos_gpu.subtract(XTF_gpu, target=XTFneg_gpu) FTF_gpu.greater_than(0, target=FTFgreater_gpu) FTF_gpu.mult(FTFgreater_gpu, target=FTFpos_gpu) FTFpos_gpu.subtract(FTF_gpu, target=FTFneg_gpu) # compute the G update cm.dot(G_gpu, FTFpos_gpu, target=GFTFpos_gpu) cm.dot(G_gpu, FTFneg_gpu, target=GFTFneg_gpu) XTFpos_gpu.add(GFTFneg_gpu) XTFneg_gpu.add(GFTFpos_gpu) XTFpos_gpu.divide(XTFneg_gpu) cm.sqrt(XTFpos_gpu) G_gpu.mult(XTFpos_gpu) if i % niter_test_conv == 0: newExpo = np.argmax(G_gpu.asarray(), axis=0) if (oldExposures != newExpo).any(): oldExposures = newExpo const = 0 else: const += 1 if const == stop_threshold: print "NMF converged after %i iterations" % i break return F_gpu.asarray(), G_gpu.asarray().T
def NMFconvex(X, r, iterations=1000, G=None, niter_test_conv=10, stop_threshold=40): n = np.size(X, 0) m = np.size(X, 1) if G is None: # implemnt k means initialization G = np.random.random((m, r)).astype(np.float32) W = np.random.random((m, r)).astype(np.float32) else: G += 0.2 Wi = np.dot(G, np.linalg.inv(np.dot(G.T, G))) Wipos = (np.abs(Wi) + Wi) / 2 W = Wipos + 0.2 * np.sum(np.abs(Wi)) / Wi.size G_gpu = cm.CUDAMatrix(G) W_gpu = cm.CUDAMatrix(W) X_gpu = cm.CUDAMatrix(X) XTX_gpu = cm.dot(X_gpu.T, X_gpu) XTXpos_gpu = cm.empty((m, m)) XTX_gpu.greater_than(0, target=XTXpos_gpu) XTXpos_gpu.mult(XTX_gpu) XTXneg_gpu = cm.empty((m, m)) XTXpos_gpu.subtract(XTX_gpu, target=XTXneg_gpu) XTXnegW_gpu = cm.empty((m, r)) XTXposW_gpu = cm.empty((m, r)) GWT_gpu = cm.empty((m, m)) update1_gpu = cm.empty((m, r)) update2_gpu = cm.empty((m, r)) GTG_gpu = cm.empty((r, r)) XTXnegG_gpu = cm.empty((m, r)) XTXposG_gpu = cm.empty((m, r)) const = 0 oldExposures = np.argmax(G, axis=1) for i in range(0, iterations): cm.dot(XTXneg_gpu, W_gpu, target=XTXnegW_gpu) cm.dot(XTXpos_gpu, W_gpu, target=XTXposW_gpu) # Update G cm.dot(G_gpu, W_gpu.T, target=GWT_gpu) # G *= np.sqrt((XTXposW + np.dot(GWT, XTXnegW))/(XTXnegW+np.dot(GWT, XTXposW))) cm.dot(GWT_gpu, XTXnegW_gpu, target=update1_gpu) cm.dot(GWT_gpu, XTXposW_gpu, target=update2_gpu) update1_gpu.add(XTXposW_gpu) update2_gpu.add(XTXnegW_gpu) update1_gpu.divide(update2_gpu) cm.sqrt(update1_gpu) G_gpu.mult(update1_gpu) # Update W cm.dot(G_gpu.T, G_gpu, target=GTG_gpu) #W *= np.sqrt((np.dot(XTXpos, G) + np.dot(XTXnegW, GTG))/(np.dot(XTXneg, G) + np.dot(XTXposW, GTG))) cm.dot(XTXpos_gpu, G_gpu, target=XTXposG_gpu) cm.dot(XTXneg_gpu, G_gpu, target=XTXnegG_gpu) cm.dot(XTXnegW_gpu, GTG_gpu, target=update1_gpu) cm.dot(XTXposW_gpu, GTG_gpu, target=update2_gpu) update1_gpu.add(XTXposG_gpu) update2_gpu.add(XTXnegG_gpu) update1_gpu.divide(update2_gpu) cm.sqrt(update1_gpu) W_gpu.mult(update1_gpu) if i % niter_test_conv == 0: newExpo = np.argmax(G_gpu.asarray(), axis=1) if (oldExposures != newExpo).any(): oldExposures = newExpo const = 0 else: const += 1 if const == stop_threshold: print "NMF converged after %i iterations" % i break return cm.dot(X_gpu, W_gpu).asarray(), G_gpu.asarray().T
import cudamat as cm import numpy as np cm.cuda_set_device(0) cm.cublas_init() t = np.load('/home/scw4750/frelam_20161027/get_feature/data/feature_0w-5w.npy') t.dtype = '<f' feat = t[0:40000] print t a = cm.CUDAMatrix(feat) c = cm.dot(a, a.T) e = cm.sqrt(c) e = e.asarray() #e.dtype = 'float' print len(e) dioa = None for index, item in enumerate(e): if dioa is None: temp = np.array(item[index]) dioa = np.copy(temp) else: temp = np.array(item[index]) dioa = np.vstack((dioa, temp)) feat = t[40000:50000] a = cm.CUDAMatrix(feat) c = cm.dot(a, a.T) e_2 = cm.sqrt(c) e_2 = e_2.asarray() print len(e_2) for index, item in enumerate(e_2):
def train(self): ''' Main train function : modified version of the original train function. Additions : GPU selection (useful for multi-GPU machines) Saving the sum of the square of the data for post-processing Visible data are saved Data samples are permuted for training Weights are saved every 100 training epochs Training energy is visualized every 100 training epochs NOTE : anneal learning rate used in the initial code, is NOT used here! ''' #plt.ion() f1 = plt.figure() ax1 = f1.add_subplot(111) #ax2 = f1.add_subplot(122) #plt.show() cmt.cuda_set_device(self.gpuId) cmt.cublas_init() cmt.CUDAMatrix.init_random(1) np.random.seed(self.npRandSeed) prng = RandomState(self.npRandState) ################################################################ ##################### CHANGE PATH ############################## # Move to current experiment path: os.chdir(self.saveDir) # Get current path: os.getcwd() self.plotsDir = 'plots' #self.probabilitiesDir = 'p_all' if not os.path.isdir(self.plotsDir): os.makedirs(self.plotsDir) if not os.path.isdir(self.plotsDir + '/energy'): os.makedirs(self.plotsDir + '/energy') #if not os.path.isdir(self.probabilitiesDir): # os.makedirs(self.probabilitiesDir) if not os.path.isdir('weights'): os.makedirs('weights') d = self.d.astype(np.float32) print("visible size: ", d.shape) dsq = np.square(d) lsq = np.sum(dsq, axis=0) with open('lsqComplete.pkl', 'wb') as pklFile: cPickle.dump(lsq, pklFile) del dsq, lsq # Save visible data : visData = d np.savez('visData.npz', data=d, obsKeys=self.obsKeys, epochTime=self.epochTime) with open('visData.txt', 'w') as f: f.write("\n Dataset : %s" % (self.dataFilename)) f.write("\n visData size: %s " % str(visData.shape)) f.write("\n visData type: %s " % str(visData.dtype)) f.write("\n \n visData Range: %s " % str(np.max(visData, axis=0) - np.min(visData, axis=0))) f.write("\n \n visData min: %s " % str(np.min(visData, axis=0))) f.write("\n \n visData max: %s " % str(np.max(visData, axis=0))) f.write("\n \n visData mean: %s " % str(np.mean(visData, axis=0))) f.write("\n \n visData std: %s " % str(np.std(visData, axis=0))) f.close() del visData #if not needed for computing the latent states permIdx = prng.permutation(d.shape[0]) d = d[permIdx, :] #subsetting train and test datasets #trainPerc = 0.7 #trainSampNum = int(np.ceil(trainPerc*d.shape[0])) #trainSampNum = int(np.floor(trainSampNum/self.batch_size)*self.batch_size) #testSampNum = int(d.shape[0]-trainSampNum-1) # The test dataset is not used at the moment, it can be used as # a validation set to check for overfitting. To use it, uncomment # all the variables with 'test' in their name #~ d_test = d[trainSampNum+1:,:] #d = d[:trainSampNum,:] #obsKeys = self.obsKeys[:trainSampNum] totnumcases = d.shape[0] num_vis = d.shape[1] num_batches = int(totnumcases / self.batch_size) print("num_batches: ", num_batches) dev_dat = cmt.CUDAMatrix(d.T) # VxP #~ test_dat = cmt.CUDAMatrix(d_test.T) del d, self.d, self.epochTime, self.obsKeys # training parameters (as in the original code by Ranzato) epsilon = self.epsilon epsilonVF = 2 * epsilon epsilonFH = 0.02 * epsilon epsilonb = 0.02 * epsilon epsilonw_mean = 0.2 * epsilon epsilonb_mean = 0.1 * epsilon weightcost_final = self.weightcost_final # HMC setting hmc_step_nr = self.hmc_step_nr hmc_step = 0.01 hmc_target_ave_rej = self.hmc_target_ave_rej hmc_ave_rej = hmc_target_ave_rej # initialize weights VF = cmt.CUDAMatrix( np.array(0.02 * prng.randn(num_vis, self.num_fac), dtype=np.float32, order='F')) # VxH if self.apply_mask == 0: FH = cmt.CUDAMatrix( np.array(np.eye(self.num_fac, self.num_hid_cov), dtype=np.float32, order='F')) # HxO else: dd = loadmat( 'your_FHinit_mask_file.mat' ) # see CVPR2010paper_material/topo2D_3x3_stride2_576filt.mat for an example FH = cmt.CUDAMatrix(np.array(dd["FH"], dtype=np.float32, order='F')) bias_cov = cmt.CUDAMatrix( np.array(2.0 * np.ones((self.num_hid_cov, 1)), dtype=np.float32, order='F')) bias_vis = cmt.CUDAMatrix( np.array(np.zeros((num_vis, 1)), dtype=np.float32, order='F')) w_mean = cmt.CUDAMatrix( np.array(0.05 * prng.randn(num_vis, self.num_hid_mean), dtype=np.float32, order='F')) # VxH bias_mean = cmt.CUDAMatrix( np.array(-2.0 * np.ones((self.num_hid_mean, 1)), dtype=np.float32, order='F')) # initialize variables to store derivatives VFinc = cmt.CUDAMatrix( np.array(np.zeros((num_vis, self.num_fac)), dtype=np.float32, order='F')) FHinc = cmt.CUDAMatrix( np.array(np.zeros((self.num_fac, self.num_hid_cov)), dtype=np.float32, order='F')) bias_covinc = cmt.CUDAMatrix( np.array(np.zeros((self.num_hid_cov, 1)), dtype=np.float32, order='F')) bias_visinc = cmt.CUDAMatrix( np.array(np.zeros((num_vis, 1)), dtype=np.float32, order='F')) w_meaninc = cmt.CUDAMatrix( np.array(np.zeros((num_vis, self.num_hid_mean)), dtype=np.float32, order='F')) bias_meaninc = cmt.CUDAMatrix( np.array(np.zeros((self.num_hid_mean, 1)), dtype=np.float32, order='F')) # initialize temporary storage data = cmt.CUDAMatrix( np.array(np.empty((num_vis, self.batch_size)), dtype=np.float32, order='F')) # VxP normdata = cmt.CUDAMatrix( np.array(np.empty((num_vis, self.batch_size)), dtype=np.float32, order='F')) # VxP negdataini = cmt.CUDAMatrix( np.array(np.empty((num_vis, self.batch_size)), dtype=np.float32, order='F')) # VxP feat = cmt.CUDAMatrix( np.array(np.empty((self.num_fac, self.batch_size)), dtype=np.float32, order='F')) featsq = cmt.CUDAMatrix( np.array(np.empty((self.num_fac, self.batch_size)), dtype=np.float32, order='F')) negdata = cmt.CUDAMatrix( np.array(prng.randn(num_vis, self.batch_size), dtype=np.float32, order='F')) old_energy = cmt.CUDAMatrix( np.array(np.zeros((1, self.batch_size)), dtype=np.float32, order='F')) new_energy = cmt.CUDAMatrix( np.array(np.zeros((1, self.batch_size)), dtype=np.float32, order='F')) energy = cmt.CUDAMatrix( np.array(np.zeros((1, self.batch_size)), dtype=np.float32, order='F')) gradient = cmt.CUDAMatrix( np.array(np.empty((num_vis, self.batch_size)), dtype=np.float32, order='F')) # VxP normgradient = cmt.CUDAMatrix( np.array(np.empty((num_vis, self.batch_size)), dtype=np.float32, order='F')) # VxP thresh = cmt.CUDAMatrix( np.array(np.zeros((1, self.batch_size)), dtype=np.float32, order='F')) feat_mean = cmt.CUDAMatrix( np.array(np.empty((self.num_hid_mean, self.batch_size)), dtype=np.float32, order='F')) vel = cmt.CUDAMatrix( np.array(prng.randn(num_vis, self.batch_size), dtype=np.float32, order='F')) length = cmt.CUDAMatrix( np.array(np.zeros((1, self.batch_size)), dtype=np.float32, order='F')) # 1xP lengthsq = cmt.CUDAMatrix( np.array(np.zeros((1, self.batch_size)), dtype=np.float32, order='F')) # 1xP normcoeff = cmt.CUDAMatrix( np.array(np.zeros((1, self.batch_size)), dtype=np.float32, order='F')) # 1xP # commented to avoid computing the energy on test data #~ data_test = cmt.CUDAMatrix( np.array(np.empty((num_vis, testSampNum)), dtype=np.float32, order='F')) # Vxtest_batch #~ normdata_test = cmt.CUDAMatrix( np.array(np.empty((num_vis, testSampNum)), dtype=np.float32, order='F')) # Vxtest_batch #~ length_test = cmt.CUDAMatrix( np.array(np.zeros((1, testSampNum)), dtype=np.float32, order='F')) # 1xtest_batch #~ lengthsq_test = cmt.CUDAMatrix( np.array(np.zeros((1, testSampNum)), dtype=np.float32, order='F')) # 1xtest_batch #~ normcoeff_test = cmt.CUDAMatrix( np.array(np.zeros((1, testSampNum)), dtype=np.float32, order='F')) # 1xtest_batch #~ vel_test = cmt.CUDAMatrix( np.array(prng.randn(num_vis, testSampNum), dtype=np.float32, order='F')) #~ feat_test = cmt.CUDAMatrix( np.array(np.empty((self.num_fac, testSampNum)), dtype=np.float32, order='F')) #~ featsq_test = cmt.CUDAMatrix( np.array(np.empty((self.num_fac, testSampNum)), dtype=np.float32, order='F')) #~ feat_mean_test = cmt.CUDAMatrix( np.array(np.empty((self.num_hid_mean, testSampNum)), dtype=np.float32, order='F')) #~ energy_test = cmt.CUDAMatrix( np.array(np.zeros((1, testSampNum)), dtype=np.float32, order='F')) if self.apply_mask == 1: # this used to constrain very large FH matrices only allowing to change values in a neighborhood dd = loadmat('your_FHinit_mask_file.mat') mask = cmt.CUDAMatrix( np.array(dd["mask"], dtype=np.float32, order='F')) normVF = 1 small = 0.5 # other temporary vars t1 = cmt.CUDAMatrix( np.array(np.empty((self.num_hid_cov, self.batch_size)), dtype=np.float32, order='F')) t2 = cmt.CUDAMatrix( np.array(np.empty((self.num_hid_cov, self.batch_size)), dtype=np.float32, order='F')) t3 = cmt.CUDAMatrix( np.array(np.empty((self.num_fac, self.batch_size)), dtype=np.float32, order='F')) t4 = cmt.CUDAMatrix( np.array(np.empty((1, self.batch_size)), dtype=np.float32, order='F')) t5 = cmt.CUDAMatrix( np.array(np.empty((1, 1)), dtype=np.float32, order='F')) t6 = cmt.CUDAMatrix( np.array(np.empty((num_vis, self.batch_size)), dtype=np.float32, order='F')) t7 = cmt.CUDAMatrix( np.array(np.empty((num_vis, self.batch_size)), dtype=np.float32, order='F')) t8 = cmt.CUDAMatrix( np.array(np.empty((num_vis, self.num_fac)), dtype=np.float32, order='F')) t9 = cmt.CUDAMatrix( np.array(np.zeros((self.num_fac, self.num_hid_cov)), dtype=np.float32, order='F')) t10 = cmt.CUDAMatrix( np.array(np.empty((1, self.num_fac)), dtype=np.float32, order='F')) t11 = cmt.CUDAMatrix( np.array(np.empty((1, self.num_hid_cov)), dtype=np.float32, order='F')) # commented to avoid computing the energy on test data #~ t1_test = cmt.CUDAMatrix( np.array(np.empty((self.num_hid_cov, testSampNum)), dtype=np.float32, order='F')) #~ t2_test = cmt.CUDAMatrix( np.array(np.empty((self.num_hid_cov, testSampNum)), dtype=np.float32, order='F')) #~ t3_test = cmt.CUDAMatrix( np.array(np.empty((self.num_fac, testSampNum)), dtype=np.float32, order='F')) #~ t4_test = cmt.CUDAMatrix( np.array(np.empty((1,testSampNum)), dtype=np.float32, order='F')) #~ t5_test = cmt.CUDAMatrix( np.array(np.empty((1,1)), dtype=np.float32, order='F')) #~ t6_test = cmt.CUDAMatrix( np.array(np.empty((num_vis, testSampNum)), dtype=np.float32, order='F')) meanEnergy = np.zeros(self.num_epochs) minEnergy = np.zeros(self.num_epochs) maxEnergy = np.zeros(self.num_epochs) #~ meanEnergy_test = np.zeros(self.num_epochs) #~ minEnergy_test = np.zeros(self.num_epochs) #~ maxEnergy_test = np.zeros(self.num_epochs) # start training for epoch in range(self.num_epochs): print "Epoch " + str(epoch) # anneal learning rates as found in the original code - # uncomment if you wish to use annealing! #~ epsilonVFc = epsilonVF/max(1,epoch/20) #~ epsilonFHc = epsilonFH/max(1,epoch/20) #~ epsilonbc = epsilonb/max(1,epoch/20) #~ epsilonw_meanc = epsilonw_mean/max(1,epoch/20) #~ epsilonb_meanc = epsilonb_mean/max(1,epoch/20) # no annealing is used in our experiments because learning # was stopping too early epsilonVFc = epsilonVF epsilonFHc = epsilonFH epsilonbc = epsilonb epsilonw_meanc = epsilonw_mean epsilonb_meanc = epsilonb_mean weightcost = weightcost_final if epoch <= self.startFH: epsilonFHc = 0 if epoch <= self.startwd: weightcost = 0 # commented to avoid computing the energy on test data #~ data_test = test_dat #~ data_test.mult(data_test, target = t6_test) # DxP #~ t6_test.sum(axis = 0, target = lengthsq_test) # 1xP #~ lengthsq_test.mult(1./num_vis) # normalize by number of components (like std) #~ lengthsq_test.add(small) # small avoids division by 0 #~ cmt.sqrt(lengthsq_test, target = length_test) #~ length_test.reciprocal(target = normcoeff_test) # 1xP #~ data_test.mult_by_row(normcoeff_test, target = normdata_test) # normalized data for batch in range(num_batches): # get current minibatch data = dev_dat.slice( batch * self.batch_size, (batch + 1) * self.batch_size) # DxP (nr dims x nr samples) # normalize input data data.mult(data, target=t6) # DxP t6.sum(axis=0, target=lengthsq) # 1xP lengthsq.mult( 1. / num_vis) # normalize by number of components (like std) lengthsq.add(small) # small avoids division by 0 cmt.sqrt(lengthsq, target=length) length.reciprocal(target=normcoeff) # 1xP data.mult_by_row(normcoeff, target=normdata) # normalized data ## compute positive sample derivatives # covariance part cmt.dot(VF.T, normdata, target=feat) # HxP (nr facs x nr samples) feat.mult(feat, target=featsq) # HxP cmt.dot(FH.T, featsq, target=t1) # OxP (nr cov hiddens x nr samples) t1.mult(-0.5) t1.add_col_vec(bias_cov) # OxP t1.apply_sigmoid(target=t2) # OxP cmt.dot(featsq, t2.T, target=FHinc) # HxO cmt.dot(FH, t2, target=t3) # HxP t3.mult(feat) cmt.dot(normdata, t3.T, target=VFinc) # VxH t2.sum(axis=1, target=bias_covinc) bias_covinc.mult(-1) # visible bias data.sum(axis=1, target=bias_visinc) bias_visinc.mult(-1) # mean part cmt.dot(w_mean.T, data, target=feat_mean) # HxP (nr mean hiddens x nr samples) feat_mean.add_col_vec(bias_mean) # HxP feat_mean.apply_sigmoid() # HxP feat_mean.mult(-1) cmt.dot(data, feat_mean.T, target=w_meaninc) feat_mean.sum(axis=1, target=bias_meaninc) # HMC sampling: draw an approximate sample from the model if self.doPCD == 0: # CD-1 (set negative data to current training samples) hmc_step, hmc_ave_rej = self.draw_HMC_samples( data, negdata, normdata, vel, gradient, normgradient, new_energy, old_energy, VF, FH, bias_cov, bias_vis, w_mean, bias_mean, hmc_step, hmc_step_nr, hmc_ave_rej, hmc_target_ave_rej, t1, t2, t3, t4, t5, t6, t7, thresh, feat, featsq, self.batch_size, feat_mean, length, lengthsq, normcoeff, small, num_vis) else: # PCD-1 (use previous negative data as starting point for chain) negdataini.assign(negdata) hmc_step, hmc_ave_rej = self.draw_HMC_samples( negdataini, negdata, normdata, vel, gradient, normgradient, new_energy, old_energy, VF, FH, bias_cov, bias_vis, w_mean, bias_mean, hmc_step, hmc_step_nr, hmc_ave_rej, hmc_target_ave_rej, t1, t2, t3, t4, t5, t6, t7, thresh, feat, featsq, self.batch_size, feat_mean, length, lengthsq, normcoeff, small, num_vis) # compute derivatives at the negative samples # normalize input data negdata.mult(negdata, target=t6) # DxP t6.sum(axis=0, target=lengthsq) # 1xP lengthsq.mult( 1. / num_vis) # normalize by number of components (like std) lengthsq.add(small) cmt.sqrt(lengthsq, target=length) length.reciprocal(target=normcoeff) # 1xP negdata.mult_by_row(normcoeff, target=normdata) # normalized data # covariance part cmt.dot(VF.T, normdata, target=feat) # HxP feat.mult(feat, target=featsq) # HxP cmt.dot(FH.T, featsq, target=t1) # OxP t1.mult(-0.5) t1.add_col_vec(bias_cov) # OxP t1.apply_sigmoid(target=t2) # OxP FHinc.subtract_dot(featsq, t2.T) # HxO FHinc.mult(0.5) cmt.dot(FH, t2, target=t3) # HxP t3.mult(feat) VFinc.subtract_dot(normdata, t3.T) # VxH bias_covinc.add_sums(t2, axis=1) # visible bias bias_visinc.add_sums(negdata, axis=1) # mean part cmt.dot(w_mean.T, negdata, target=feat_mean) # HxP feat_mean.add_col_vec(bias_mean) # HxP feat_mean.apply_sigmoid() # HxP w_meaninc.add_dot(negdata, feat_mean.T) bias_meaninc.add_sums(feat_mean, axis=1) # update parameters VFinc.add_mult(VF.sign(), weightcost) # L1 regularization VF.add_mult(VFinc, -epsilonVFc / self.batch_size) # normalize columns of VF: normalize by running average of their norm VF.mult(VF, target=t8) t8.sum(axis=0, target=t10) cmt.sqrt(t10) t10.sum(axis=1, target=t5) t5.copy_to_host() normVF = .95 * normVF + ( .05 / self.num_fac) * t5.numpy_array[0, 0] # estimate norm t10.reciprocal() VF.mult_by_row(t10) VF.mult(normVF) bias_cov.add_mult(bias_covinc, -epsilonbc / self.batch_size) bias_vis.add_mult(bias_visinc, -epsilonbc / self.batch_size) if epoch > self.startFH: FHinc.add_mult(FH.sign(), weightcost) # L1 regularization FH.add_mult(FHinc, -epsilonFHc / self.batch_size) # update # set to 0 negative entries in FH FH.greater_than(0, target=t9) FH.mult(t9) if self.apply_mask == 1: FH.mult(mask) # normalize columns of FH: L1 norm set to 1 in each column FH.sum(axis=0, target=t11) t11.reciprocal() FH.mult_by_row(t11) w_meaninc.add_mult(w_mean.sign(), weightcost) w_mean.add_mult(w_meaninc, -epsilonw_meanc / self.batch_size) bias_mean.add_mult(bias_meaninc, -epsilonb_meanc / self.batch_size) if self.verbose == 1: print "VF: " + '%3.2e' % VF.euclid_norm( ) + ", DVF: " + '%3.2e' % ( VFinc.euclid_norm() * (epsilonVFc / self.batch_size) ) + ", FH: " + '%3.2e' % FH.euclid_norm( ) + ", DFH: " + '%3.2e' % ( FHinc.euclid_norm() * (epsilonFHc / self.batch_size) ) + ", bias_cov: " + '%3.2e' % bias_cov.euclid_norm( ) + ", Dbias_cov: " + '%3.2e' % ( bias_covinc.euclid_norm() * (epsilonbc / self.batch_size) ) + ", bias_vis: " + '%3.2e' % bias_vis.euclid_norm( ) + ", Dbias_vis: " + '%3.2e' % ( bias_visinc.euclid_norm() * (epsilonbc / self.batch_size) ) + ", wm: " + '%3.2e' % w_mean.euclid_norm( ) + ", Dwm: " + '%3.2e' % ( w_meaninc.euclid_norm() * (epsilonw_meanc / self.batch_size) ) + ", bm: " + '%3.2e' % bias_mean.euclid_norm( ) + ", Dbm: " + '%3.2e' % ( bias_meaninc.euclid_norm() * (epsilonb_meanc / self.batch_size) ) + ", step: " + '%3.2e' % hmc_step + ", rej: " + '%3.2e' % hmc_ave_rej with open('terminal.txt', 'a') as f: f.write('\n' + "epoch: %s" % str(epoch) + ", VF: " + '%3.2e' % VF.euclid_norm() + ", DVF: " + '%3.2e' % (VFinc.euclid_norm() * (epsilonVFc / self.batch_size)) + ", FH: " + '%3.2e' % FH.euclid_norm() + ", DFH: " + '%3.2e' % (FHinc.euclid_norm() * (epsilonFHc / self.batch_size)) + ", bias_cov: " + '%3.2e' % bias_cov.euclid_norm() + ", Dbias_cov: " + '%3.2e' % (bias_covinc.euclid_norm() * (epsilonbc / self.batch_size)) + ", bias_vis: " + '%3.2e' % bias_vis.euclid_norm() + ", Dbias_vis: " + '%3.2e' % (bias_visinc.euclid_norm() * (epsilonbc / self.batch_size)) + ", wm: " + '%3.2e' % w_mean.euclid_norm() + ", Dwm: " + '%3.2e' % (w_meaninc.euclid_norm() * (epsilonw_meanc / self.batch_size)) + ", bm: " + '%3.2e' % bias_mean.euclid_norm() + ", Dbm: " + '%3.2e' % (bias_meaninc.euclid_norm() * (epsilonb_meanc / self.batch_size)) + ", step: " + '%3.2e' % hmc_step + ", rej: " + '%3.2e' % hmc_ave_rej) sys.stdout.flush() # commented to avoid computing the energy on trainig data self.compute_energy_mcRBM_visual(data, normdata, energy, VF, FH, bias_cov, bias_vis, w_mean, bias_mean, t1, t2, t6, feat, featsq, feat_mean, length, lengthsq, normcoeff, small, num_vis) energy.copy_to_host() meanEnergy[epoch] = np.mean(energy.numpy_array) minEnergy[epoch] = np.min(energy.numpy_array) maxEnergy[epoch] = np.max(energy.numpy_array) # commented to avoid computing the energy on test data #~ self.compute_energy_mcRBM_visual(data_test,normdata_test,energy_test,VF,FH,bias_cov,bias_vis,w_mean,bias_mean,t1_test,t2_test,t6_test,feat_test,featsq_test,feat_mean_test,length_test,lengthsq_test,normcoeff_test,small,num_vis) #~ energy_test.copy_to_host() #~ meanEnergy_test[epoch] = np.mean(energy_test.numpy_array) #~ minEnergy_test[epoch] = np.min(energy_test.numpy_array) #~ maxEnergy_test[epoch] = np.max(energy_test.numpy_array) ax1.cla() ax1.plot(range(epoch), meanEnergy[0:epoch]) ax1.plot(range(epoch), maxEnergy[0:epoch]) ax1.plot(range(epoch), minEnergy[0:epoch]) if np.mod(epoch, 100) == 0: #f1.savefig(output_folder + str(epoch)+'_'+'fig.png') f1.savefig(self.plotsDir + '/energy/energyAt_%s.png' % str(epoch)) # back-up every once in a while if np.mod(epoch, 100) == 0: VF.copy_to_host() FH.copy_to_host() bias_cov.copy_to_host() w_mean.copy_to_host() bias_mean.copy_to_host() bias_vis.copy_to_host() savemat( "./weights/ws_temp%s" % str(epoch), { 'VF': VF.numpy_array, 'FH': FH.numpy_array, 'bias_cov': bias_cov.numpy_array, 'bias_vis': bias_vis.numpy_array, 'w_mean': w_mean.numpy_array, 'bias_mean': bias_mean.numpy_array, 'epoch': epoch }) # uncomment if computing the energy in order to store its evolution throghout training #~ savemat(self.refDir + '/' + "training_energy_" + str(self.num_fac) + "_cov" + str(self.num_hid_cov) + "_mean" + str(self.num_hid_mean), {'meanEnergy':meanEnergy,'meanEnergy_test':meanEnergy_test,'maxEnergy': maxEnergy, 'maxEnergy_test': maxEnergy_test, 'minEnergy': minEnergy, 'minEnergy_test': minEnergy_test, 'epoch':epoch}) #savemat("training_energy_" + str(self.num_fac) + "_cov" + str(self.num_hid_cov) + "_mean" + str(self.num_hid_mean), {'meanEnergy':meanEnergy, 'maxEnergy': maxEnergy, 'minEnergy': minEnergy, 'epoch':epoch}) # in order to stop the training gracefully, create an empty file # named 'stop_now' in the folder containing the experiment # configuration file if os.path.isfile('stop_now'): break # final back-up VF.copy_to_host() FH.copy_to_host() bias_cov.copy_to_host() bias_vis.copy_to_host() w_mean.copy_to_host() bias_mean.copy_to_host() savemat( "ws_fac%s" % str(self.num_fac) + "_cov%s" % str(self.num_hid_cov) + "_mean%s" % str(self.num_hid_mean), { 'VF': VF.numpy_array, 'FH': FH.numpy_array, 'bias_cov': bias_cov.numpy_array, 'bias_vis': bias_vis.numpy_array, 'w_mean': w_mean.numpy_array, 'bias_mean': bias_mean.numpy_array, 'epoch': epoch }) # uncomment if computing the energy in order to store its evolution throghout training #~ savemat(self.refDir + '/' + "training_energy_" + str(self.num_fac) + "_cov" + str(self.num_hid_cov) + "_mean" + str(self.num_hid_mean), {'meanEnergy':meanEnergy,'meanEnergy_test':meanEnergy_test,'maxEnergy': maxEnergy, 'maxEnergy_test': maxEnergy_test, 'minEnergy': minEnergy, 'minEnergy_test': minEnergy_test, 'epoch':epoch}) savemat( "training_energy_" + str(self.num_fac) + "_cov" + str(self.num_hid_cov) + "_mean" + str(self.num_hid_mean), { 'meanEnergy': meanEnergy, 'maxEnergy': maxEnergy, 'minEnergy': minEnergy, 'epoch': epoch }) # Compute states if desired: # normalise data for covariance hidden: #dsq = np.square(visData) #lsq = np.sum(dsq, axis=0) #lsq /= visData.shape[1] #lsq += np.spacing(1) #l = np.sqrt(lsq) #normD = visData/l #logisticArg_c = (-0.5*np.dot(FH.numpy_array.T, np.square(np.dot(VF.numpy_array.T, normD.T))) + bias_cov.numpy_array).T #p_hc = logisticFunc(logisticArg_c) #logisticArg_m = np.dot(visData, w_mean.numpy_array) + bias_mean.numpy_array.T #p_hm = logisticFunc(logisticArg_m) #p_all = np.concatenate((p_hc, p_hm), axis=1) #savemat(self.probabilitiesDir + '/pAll_%i.mat' % epoch, mdict={'p_all':p_all}) with open('done', 'w') as doneFile: doneFile.write( datetime.strftime(datetime.now(), '%d/%m/%Y %H:%M:%S'))