def NNMF_gpu(X,r,tol,V=v0,W=w0,verbose=1): Vr = V[:,0:r].copy() Wr = W[0:r,:].copy() X_gpu = gpuarray.to_gpu(X) V_gpu = gpuarray.to_gpu(Vr) W_gpu = gpuarray.to_gpu(Wr) #Frobinius norm at previous step B_gpu = linalg.dot(V_gpu, W_gpu) L = linalg.norm(X_gpu-B_gpu)**2 iteration = 0 while 1: #update V V_gpu *= linalg.dot(X_gpu,linalg.transpose(W_gpu)) V_gpu /= linalg.dot(B_gpu,linalg.transpose(W_gpu)) B_gpu = linalg.dot(V_gpu, W_gpu) #update W W_gpu *= linalg.dot(linalg.transpose(V_gpu),X_gpu) W_gpu /= linalg.dot(linalg.transpose(V_gpu),B_gpu) B_gpu = linalg.dot(V_gpu, W_gpu) Lnew = linalg.norm(X_gpu-B_gpu)**2 if abs(Lnew-L) <= tol*(L+1): break else: L = Lnew iteration += 1 if(verbose and iteration%50==0): print "At iteration %i, the loss is %.2f" %(iteration, L) return V_gpu,W_gpu,iteration
def computeLambda(self): print('\t\tComputing lambda...') T = np.zeros(self.num_columns) if (self.GPU == True): if not self.affine: gpu_data = gpuarray.to_gpu(self.data) C_gpu = linalg.dot(gpu_data, gpu_data, transa='T') for i in xrange(self.num_columns): T[i] = linalg.norm(C_gpu[i, :]) else: gpu_data = gpuarray.to_gpu(self.data) # affine transformation y_mean_gpu = misc.mean(gpu_data, axis=1) # creating affine matrix to subtract to the data (may encounter problem with strides) aff_mat = np.zeros([self.num_rows, self.num_columns]).astype('f') for i in xrange(0, self.num_columns): aff_mat[:, i] = y_mean_gpu.get() aff_mat_gpu = gpuarray.to_gpu(aff_mat) gpu_data_aff = misc.subtract(aff_mat_gpu, gpu_data) C_gpu = linalg.dot(gpu_data, gpu_data_aff, transa='T') #computing euclidean norm (rows) for i in xrange(self.num_columns): T[i] = linalg.norm(C_gpu[i, :]) else: if not self.affine: T = np.linalg.norm(np.dot(self.data.T, self.data), axis=1) else: #affine transformation y_mean = np.mean(self.data, axis=1) tmp_mat = np.outer(y_mean, np.ones( self.num_columns)) - self.data T = np.linalg.norm(np.dot(self.data.T, tmp_mat), axis=1) _lambda = np.amax(T) return _lambda
def skcuda_linalg(a, b): linalg.init() a = np.asarray(a, np.float32) b = np.asarray(b, np.float32) a_gpu = gpuarray.to_gpu(a) b_gpu = gpuarray.to_gpu(b) c_gpu = linalg.dot(a_gpu, b_gpu, 'T') a_nrm = linalg.norm(a_gpu) b_nrm = linalg.norm(b_gpu) type(a_nrm) ans = misc.divide(c_gpu, a_nrm * b_nrm) print ans
def get_Ehrenfest(self, t): """ Calculate observables entering the Ehrenfest theorems at time :param t: current time :return: coordinate and momentum densities, if the Ehrenfest theorems were calculated; otherwise, return None """ if self.isEhrenfest: ######################################################################### # # Working in the coordinate representation # ######################################################################### # Normalize the wave function as required in self.weighted_func_cuda_code self.wavefunction /= cu_linalg.norm(self.wavefunction) # save the current value of <X> self.X_average.append(self.get_average("X")) # save the current value of <-diff_V> self.P_average_RHS.append(-self.get_average(self.diff_V)) # save the potential energy self.hamiltonian_average.append(self.get_average(self.V)) ######################################################################### # # Working in the momentum representation # ######################################################################### cufft.fft_Z2Z(self.wavefunction, self.wavefunction, self.plan_Z2Z_psi) # Normalize the wave function as required in self.weighted_func_cuda_code self.wavefunction /= cu_linalg.norm(self.wavefunction) # save the current value of <diff_K> self.X_average_RHS.append(self.get_average(self.diff_K)) # save the current value of <P> self.P_average.append(self.get_average("P")) # add the expectation value for the kinetic energy self.hamiltonian_average[-1] += self.get_average(self.K) # going back to the coordinate representation cufft.ifft_Z2Z(self.wavefunction, self.wavefunction, self.plan_Z2Z_psi)
def fast_matmul(x, y, x_type, y_type): ''' use pycuda to compute c = a * b ''' linalg.init() a_gpu = gpuarray.to_gpu(x.astype(x_type)) a_t_gpu = gpuarray.to_gpu(x.T.copy().astype(x_type)) b_gpu = gpuarray.to_gpu(y.astype(y_type)) # row_sum = gpuarray.zeros(shape = x[0].shape, dtype = x_type) row_sum = 0 # a = np.asarray(x, x_type) # b = np.asarray(y, y_type) # a_gpu = gpuarray.to_gpu(a) # b_gpu = gpuarray.to_gpu(b) t1_inside = time.time() c_gpu = linalg.dot(a_gpu, b_gpu) for a_i in a_gpu: # row_sum = misc.add(row_sum, a_i) row_sum += a_i gg = linalg.dot(a_gpu, b_gpu) gg = linalg.dot(a_i, a_i) gg = reduce(linalg.dot, (a_gpu, b_gpu, b_gpu, b_gpu)) # tmp1, tmp2 = linalg.dot(a_gpu, b_gpu), linalg.dot(b_gpu, b_gpu) z_gpu = a_gpu.copy() tmp = a_t_gpu # print('x.T\n', x.T) # print('tmp\n', tmp) # print('x = a_gpu: ', np.allclose(x, a_gpu.get())) # print('x.T = tmp: ', np.allclose(x.T, tmp.get())) a_prod = linalg.dot(a_gpu, tmp) t2_inside = time.time() print('inside cost {:.4f}s'.format(t2_inside - t1_inside)) a = np.random.randint(-5, 5, (3, 4)).astype(np.float32) a_gpu = gpuarray.to_gpu(a) norm_gpu = linalg.norm(a_gpu) print('is norm right?', np.linalg.norm(a) == norm_gpu) a_gpu = abs(a_gpu) column_sum = misc.sum(a_gpu, axis=0) column_sum = column_sum.reshape((1, -1)) all_one_gpu = gpuarray.to_gpu(np.ones((3, 1), np.float32)) div_mat_gpu = linalg.dot(all_one_gpu, column_sum) norm_1 = a_gpu / (div_mat_gpu + 1e-3) print(a_gpu) print(column_sum) print(column_sum.shape) print(norm_1) # abs_a = a_gpu.__abs__() # print(a) # print(abs_a) # c = abs_a + a_gpu # print(repr(c)) # print(type(c)) # c = 1/2 * c # print(a_gpu, c) return c_gpu.get(), a_prod.get(), row_sum.get()
def projectout_stationary_states(self, wavefunction): """ Project out the stationary states from wavefunction :param wavefunction: provided wavefunction :return: wavefunction """ ########################################################################################## # # Making sure that all the functions are initialized # ########################################################################################## try: self.vdot self.projectout except AttributeError: wavefunction_type = dict(wave_type = dtype_to_ctype(self.wavefunction.dtype)) # set the vdot function (scalar product with complex conjugation) self.vdot = ReductionKernel( self.wavefunction.dtype, neutral="0.".format(**wavefunction_type), reduce_expr="a + b", map_expr="conj(bra[i]) * ket[i]", arguments="const {wave_type} *bra, const {wave_type} *ket".format(**wavefunction_type) ) self.projectout = ElementwiseKernel( "{wave_type} *psi, const {wave_type} *phi, const {wave_type} C".format(**wavefunction_type), "psi[i] -= C * phi[i]" ) # normalize wavefunction /= cu_linalg.norm(wavefunction) # calculate all projections projs = [self.vdot(psi, wavefunction).get() for psi in self.stationary_states] # project out the stationary states for psi, proj in zip(self.stationary_states, projs): self.projectout(wavefunction, psi, proj) # normalize wavefunction /= cu_linalg.norm(wavefunction) return wavefunction
def get_energy(self): """ Calculate the expectation value of the Hamiltonian :return: """ # Normalize the wave function as required in self.weighted_func_cuda_code self.wavefunction /= cu_linalg.norm(self.wavefunction) av_V = self.get_average(self.V) # go to the momentum representation cufft.fft_Z2Z(self.wavefunction, self.wavefunction, self.plan_Z2Z_psi) # Normalize the wave function as required in self.weighted_func_cuda_code self.wavefunction /= cu_linalg.norm(self.wavefunction) av_K = self.get_average(self.K) # go back to the coordinate representation cufft.ifft_Z2Z(self.wavefunction, self.wavefunction, self.plan_Z2Z_psi) return av_V + av_K
def set_ground_state(self, nsteps=10000): """ Obtain the ground state wave function by the imaginary time propagation :param nsteps: number of the imaginary time steps to take :return: self """ self.set_wavefunction(1.) for _ in xrange(nsteps): self.bloch_expV(self.wavefunction, **self.wavefunction_mapper_params) cufft.fft_Z2Z(self.wavefunction, self.wavefunction, self.plan_Z2Z_psi) self.bloch_expK(self.wavefunction, **self.wavefunction_mapper_params) cufft.ifft_Z2Z(self.wavefunction, self.wavefunction, self.plan_Z2Z_psi) self.bloch_expV(self.wavefunction, **self.wavefunction_mapper_params) self.wavefunction /= cu_linalg.norm(self.wavefunction) * np.sqrt(self.dX) return self
def propagate(self, steps=1): """ Time propagate the density matrix saved in self.rho :param steps: number of self.dt time increments to make :return: self """ for _ in xrange(steps): # increment current time self.t += self.dt # advance by one time step self.single_step_propagation() # calculate the Ehrenfest theorems self.get_Ehrenfest(self.t) # normalize self.wavefunction /= cu_linalg.norm(self.wavefunction) * np.sqrt(self.dX) return self
def logis(y,x): end = 0 start = 0 x = x.astype(np.float32) y = y.astype(np.float32) start=time.time() # Translado de variable a GPU x_gpu = gpuarray.to_gpu(x) y_gpu = gpuarray.to_gpu(y) linalg.init() # Transpuesta de X x_gpu_T = linalg.transpose(x_gpu) beta_gpu = linalg.dot(linalg.dot(linalg.inv(linalg.dot(x_gpu_T,x_gpu)),x_gpu_T),y_gpu) j = 1 while(True): mu = sapply(x,beta_gpu.get()) mu = mu.astype(np.float32) mu_gpu = gpuarray.to_gpu(mu) V_gpu= linalg.diag(mu_gpu) f2_gpu = linalg.multiply(mu_gpu,1-mu_gpu) f3_gpu = linalg.diag(1/f2_gpu) f4_gpu = (y_gpu-mu_gpu) f5_gpu = linalg.dot(f3_gpu,f4_gpu) if(np.isnan(f5_gpu.get()).any()): f5_cpu = f5_gpu.get() f5_cpu = nanValue(f5_cpu) f5_gpu = gpuarray.to_gpu(f5_cpu.astype(np.float32)) y_1_gpu = linalg.dot(x_gpu,beta_gpu) + f5_gpu beta_1_gpu = linalg.dot(linalg.dot(linalg.dot(linalg.inv(linalg.dot(linalg.dot(x_gpu_T,V_gpu),x_gpu)),x_gpu_T),V_gpu),y_1_gpu) check_value = np.absolute(linalg.norm(beta_1_gpu-beta_gpu)) #if(check_value<0.00001): #break if(j == 10 or check_value<0.00001): break beta_gpu = beta_1_gpu j = j + 1 end = time.time() tiempo = (end-start) return {"iteraciones":j,"Betas":beta_gpu.get(),"time":tiempo}
def set_wavefunction(self, new_wave_func): """ Set the initial wave function :param new_wave_func: 1D numpy array, 1D GPU array contaning the wave function, a string of the C code specifying the initial condition, a python function of the form F(self, X), or a float number Coordinate (X) is declared. :return: self """ if isinstance(new_wave_func, (np.ndarray, gpuarray.GPUArray)): # perform the consistency checks assert new_wave_func.shape == self.wavefunction.shape, \ "The grid sizes does not match with the wave function" # copy wigner function self.wavefunction[:] = new_wave_func.astype(np.complex128) elif isinstance(new_wave_func, FunctionType): # user supplied the function which will return the wave function self.wavefunction[:] = new_wave_func(self, self.X) elif isinstance(new_wave_func, str): # user specified C code print("\n================================ Compiling init_wavefunc ================================\n") init_wigner = SourceModule( self.init_wavefunction_cuda_source.format(cuda_consts=self.cuda_consts, new_wave_func=new_wave_func), ).get_function("Kernel") init_wigner(self.wavefunction, **self.wavefunction_mapper_params) elif isinstance(new_wave_func, (float, complex)): # user specified a constant self.wavefunction.fill(np.complex128(new_wave_func)) else: raise NotImplementedError("new_wave_func must be either function or numpy.array") # normalize self.wavefunction /= cu_linalg.norm(self.wavefunction) * np.sqrt(self.dX) return self
def update_W_hat_skcuda(W_hat, X_hat, A_t, B_t, x_sum, alpha_sum, eps, t): n_hat, k_cluster = W_hat.shape # m_dim, _ = X_hat.shape W_hat_new = W_hat.copy() linalg.init() if not isinstance(W_hat_new, gpuarray.GPUArray): W_hat_new_gpu = gpuarray.to_gpu(W_hat_new.astype(np.float64)) else: W_hat_new_gpu = W_hat_new if not isinstance(X_hat, gpuarray.GPUArray): tmp_x = np.ascontiguousarray(X_hat) X_hat_gpu = gpuarray.to_gpu(tmp_x.astype(np.float64)) else: X_hat_gpu = X_hat # X_hat_T_gpu = gpuarray.to_gpu(X_hat.T.copy().astype(np.float64)) X_hat_T_gpu = linalg.transpose(X_hat_gpu) if not isinstance(A_t, gpuarray.GPUArray): A_t_gpu = gpuarray.to_gpu(A_t.astype(np.float64)) else: A_t_gpu = A_t A_t_gpu_trans = linalg.transpose(A_t_gpu) if not isinstance(B_t, gpuarray.GPUArray): B_t_gpu = gpuarray.to_gpu(B_t.astype(np.float64)) else: B_t_gpu = B_t B_t_gpu_trans = linalg.transpose(B_t_gpu) all_ones_gpu = gpuarray.to_gpu(np.ones((n_hat, 1), dtype=np.float64)) k = 0 while True: k += 1 # ipdb.set_trace() W_hat_old_gpu = W_hat_new_gpu.copy() for j in range(k_cluster): T1 = linalg.dot(X_hat_T_gpu, B_t_gpu_trans[j, :].reshape((-1, 1))) X_product_gpu = linalg.dot(X_hat_T_gpu, X_hat_gpu) T2 = reduce(linalg.dot, (X_product_gpu, W_hat_new_gpu, A_t_gpu_trans[j, :].reshape(-1, 1))) grad_gpu = -T1 + T2 step_size = 1 / (linalg.norm(X_product_gpu) * linalg.norm(A_t_gpu_trans[j, :]) + 1e-8) tmp = -step_size * grad_gpu.reshape( (-1)) + W_hat_new_gpu[:, j].copy() # u_j_gpu = 1/2 * (tmp + abs(tmp)) # normalized_u_j_gpu = 1/max(linalg.norm(u_j_gpu), 1) * u_j_gpu # u_j_gpu = 1/max(linalg.norm(tmp), 1) * tmp # normalized_u_j_gpu = 1/2 * (u_j_gpu + abs(u_j_gpu)) u_j = geo_projection_to_cvx_cmb(tmp.get()) normalized_u_j_gpu = gpuarray.to_gpu(u_j.astype(np.float64)) W_hat_new_gpu[:, j] = normalized_u_j_gpu # T1 = linalg.dot(X_hat_T_gpu, B_t_gpu) # X_product_gpu = linalg.dot(X_hat_T_gpu, X_hat_gpu) # T2 = reduce(linalg.dot, (X_product_gpu, W_hat_new_gpu, A_t_gpu)) # grad_gpu = T2 - T1 # step_size = 1/(linalg.norm(X_product_gpu) * linalg.norm(A_t_gpu) + 1e-8) # tmp = W_hat_new_gpu - step_size * grad_gpu # u_gpu = 1/2 * (tmp + abs(tmp)) # column_sum_gpu = misc.sum(u_gpu, axis = 0).astype(np.float64) # # ipdb.set_trace() # div_mat_gpu = linalg.dot(all_ones_gpu, column_sum_gpu.reshape((1, -1))) + 1e-8 # W_hat_new_gpu = u_gpu / div_mat_gpu.astype(np.float64) # if k % 50 == 0: # g_val = get_g_hat_value(t, W_hat_new_gpu.get(), X_hat, # A_t, B_t, x_sum, alpha_sum) # print('iteration {}, function value: {:.4f}'.format(k, g_val)) if (linalg.norm(W_hat_new_gpu - W_hat_old_gpu) < eps) or k >= 10000: break return W_hat_new_gpu