def precondition(self, n): """ Preconditionate matrix, ensuring that all linear system matrix columns has an acceptable norm. Of course, final solution vector must be corrected conveniently. @param n Linear system dimension. """ gSize = (clUtils.globalSize(n),) xf = np.ones((n), dtype=np.float32) for i in range(0,n): col = np.uint32(i) # Compute column norm # We can use v as column vector because has not been used yet kernelargs = (self.A, self.v.data, col, n) self.program.column(self.queue, gSize, None, *(kernelargs)) norm = np.sqrt(self.dot(self.v,self.v).get()) FreeCAD.Console.PrintMessage("col ") FreeCAD.Console.PrintMessage(i) FreeCAD.Console.PrintMessage(", norm=") FreeCAD.Console.PrintMessage(norm) FreeCAD.Console.PrintMessage("\n") if norm < 1.0: continue fact = np.float32(1.0/norm) xf[i] = fact kernelargs = (self.A, fact, col, n) self.program.prod_c_column(self.queue, gSize, None, *(kernelargs)) self.x.set(xf)
def solve(self, A, B, x0=None, tol=10e-6, iters=300): r""" Solve linear system of equations by a Jacobi iterative method. @param A Linear system matrix. @param B Linear system independent term. @param x0 Initial aproximation of the solution. @param tol Relative error tolerance: \n \$ \vert\vert B - A \, x \vert \vert_\infty / \vert\vert B \vert \vert_\infty \$ @param iters Maximum number of iterations. """ # Create/set OpenCL buffers self.setBuffers(A, B, x0) # Get dimensions for OpenCL execution n = np.uint32(len(B)) gSize = (clUtils.globalSize(n), ) # Get a norm to can compare later for valid result B_cl = cl_array.to_device(self.context, self.queue, B) bnorm2 = self.dot(B_cl, B_cl).get() FreeCAD.Console.PrintMessage(bnorm2) FreeCAD.Console.PrintMessage("\n") # Iterate while the result converges or maximum number # of iterations is reached. for i in range(0, iters): # Compute residues kernelargs = (self.A, self.B, self.X0, self.R.data, n) # Test if the final result has been reached self.program.r(self.queue, gSize, None, *(kernelargs)) rnorm2 = self.dot(self.R, self.R).get() FreeCAD.Console.PrintMessage("\t") FreeCAD.Console.PrintMessage(rnorm2) FreeCAD.Console.PrintMessage("\n") if np.sqrt(rnorm2 / bnorm2) <= tol: break # Iterate kernelargs = (self.A, self.R.data, self.AR.data, n) self.program.dot_mat_vec(self.queue, gSize, None, *(kernelargs)) AR_R = self.dot(self.AR, self.R).get() AR_AR = self.dot(self.AR, self.AR).get() kernelargs = (self.A, self.R.data, self.X, self.X0, AR_R, AR_AR, n) self.program.minres(self.queue, gSize, None, *(kernelargs)) # Swap variables swap = self.X self.X = self.X0 self.X0 = swap # Return result computed x = np.zeros((n), dtype=np.float32) cl.enqueue_read_buffer(self.queue, self.X0, x).wait() return (x, np.sqrt(rnorm2 / bnorm2), i)
def rnorm2(self, X): """ Compute the norm square of the residuals. @param X Result of the last iteration (pyopencl.array object). @return norm square of the residuals. """ n = np.uint32(self.n) gSize = (clUtils.globalSize(n),) kernelargs = (self.A, self.B.data, X.data, self.R.data, n) # Test if the final result has been reached self.program.r(self.queue, gSize, None, *(kernelargs)) return cl_array.dot(self.R,self.R).get()
def solve(self, A, B, x0=None, tol=10e-6, iters=300, w=1.0): r""" Solve linear system of equations by a Jacobi iterative method. @param A Linear system matrix. @param B Linear system independent term. @param x0 Initial aproximation of the solution. @param tol Relative error tolerance: \n \$ \vert\vert B - A \, x \vert \vert_\infty / \vert\vert B \vert \vert_\infty \$ @param iters Maximum number of iterations. @param w Relaxation factor """ # Create/set OpenCL buffers w = np.float32(w) self.setBuffers(A, B, x0) # Get dimensions for OpenCL execution n = np.uint32(len(B)) gSize = (clUtils.globalSize(n), ) # Get a norm to can compare later for valid result B_cl = cl_array.to_device(self.context, self.queue, B) bnorm2 = self.dot(B_cl, B_cl).get() w = w / bnorm2 FreeCAD.Console.PrintMessage(bnorm2) FreeCAD.Console.PrintMessage("\n") rnorm2 = 0. # Iterate while the result converges or maximum number # of iterations is reached. for i in range(0, iters): kernelargs = (self.A, self.B, self.X0, self.X, n) # Test if the final result has been reached self.program.r(self.queue, gSize, None, *(kernelargs)) cl.enqueue_read_buffer(self.queue, self.X, self.x).wait() x_cl = cl_array.to_device(self.context, self.queue, self.x) rnorm2 = self.dot(x_cl, x_cl).get() FreeCAD.Console.PrintMessage("\t") FreeCAD.Console.PrintMessage(rnorm2) FreeCAD.Console.PrintMessage("\n") if np.sqrt(rnorm2 / bnorm2) <= tol: break # Iterate kernelargs = (self.A, self.B, self.X0, self.X, w, n) self.program.jacobi(self.queue, gSize, None, *(kernelargs)) kernelargs = (self.A, self.B, self.X, self.X0, w, n) self.program.jacobi(self.queue, gSize, None, *(kernelargs)) # Return result computed cl.enqueue_read_buffer(self.queue, self.X0, self.x).wait() return (np.copy(self.x), np.sqrt(rnorm2 / bnorm2), i)
def solve(self, A, B, x0=None, tol=10e-6, iters=300): r""" Solve linear system of equations by a Jacobi iterative method. @param A Linear system matrix. @param B Linear system independent term. @param x0 Initial aproximation of the solution. @param tol Relative error tolerance: \n \$ \vert\vert B - A \, x \vert \vert_\infty / \vert\vert B \vert \vert_\infty \$ @param iters Maximum number of iterations. """ # Create/set OpenCL buffers self.setBuffers(A,B,x0) # Get dimensions for OpenCL execution n = np.uint32(len(B)) gSize = (clUtils.globalSize(n),) # Get a norm to can compare later for valid result B_cl = cl_array.to_device(self.context,self.queue,B) bnorm2 = self.dot(B_cl,B_cl).get() FreeCAD.Console.PrintMessage(bnorm2) FreeCAD.Console.PrintMessage("\n") # Iterate while the result converges or maximum number # of iterations is reached. for i in range(0,iters): # Compute residues kernelargs = (self.A, self.B, self.X0, self.R.data, n) # Test if the final result has been reached self.program.r(self.queue, gSize, None, *(kernelargs)) rnorm2 = self.dot(self.R,self.R).get() FreeCAD.Console.PrintMessage("\t") FreeCAD.Console.PrintMessage(rnorm2) FreeCAD.Console.PrintMessage("\n") if np.sqrt(rnorm2 / bnorm2) <= tol: break # Iterate kernelargs = (self.A, self.R.data, self.AR.data, n) self.program.dot_mat_vec(self.queue, gSize, None, *(kernelargs)) AR_R = self.dot(self.AR,self.R).get() AR_AR = self.dot(self.AR,self.AR).get() kernelargs = (self.A, self.R.data, self.X, self.X0, AR_R, AR_AR, n) self.program.minres(self.queue, gSize, None, *(kernelargs)) # Swap variables swap = self.X self.X = self.X0 self.X0 = swap # Return result computed x = np.zeros((n), dtype=np.float32) cl.enqueue_read_buffer(self.queue, self.X0, x).wait() return (x, np.sqrt(rnorm2 / bnorm2), i)
def execute(self, fs, waves, sea, bem, body, t): """ Compute system matrix. @param fs Free surface instance. @param waves Waves instance. @param sea Sea boundary instance. @param bem Boundary Element Method instance. @param body Body instance. @param t Simulation time. """ # Create/set OpenCL buffers self.setBuffers(fs,waves,sea,bem,body) # Convert constant parameters L = np.float32(fs['L']) B = np.float32(fs['B']) dx = np.float32(fs['dx']) dy = np.float32(fs['dy']) T = np.float32(t) # Get dimensions for OpenCL execution nx = np.uint32(fs['Nx']) ny = np.uint32(fs['Ny']) nFS = np.uint32(fs['N']) nB = np.uint32(body['N']) n = np.uint32(nFS + nB) nSeax = np.int32(sea['Nx']) nSeay = np.int32(sea['Ny']) nW = np.uint32(waves['N']) # Call OpenCL to work gSize = (clUtils.globalSize(n),) kernelargs = (self.A, self.B, self.pos, self.area, self.normal, self.bem_p, self.bem_dp, self.waves, L, B, dx, dy, T, nx, ny, nFS, nB, n, nSeax, nSeay, nW) self.program.matrixGen(self.queue, gSize, None, *(kernelargs)) self.queue.finish() # Read output data events = [] events.append(cl.enqueue_read_buffer(self.queue, self.A, bem['A'].reshape((n*n)))) events.append(cl.enqueue_read_buffer(self.queue, self.B, bem['B'])) # events.append(cl.enqueue_read_buffer(self.queue, self.dB, bem['dB'])) for e in events: e.wait() # -------------------------------------------------------- # Debugging # -------------------------------------------------------- """ for i in range(0,fs['Nx']): for j in range(0,fs['Ny']): x = fs['pos'][i,j,0] y = fs['pos'][i,j,1] FreeCAD.Console.PrintMessage("pos = {0},{1}\n".format(x,y)) A = np.dot(bem['A'][i*fs['Ny'] + j,:], bem['gradp'][:]) B = bem['B'][i*fs['Ny'] + j] phi = 2.0 * (B - A) bem['p'][i*fs['Ny'] + j] = phi """ # -------------------------------------------------------- # Debugging # -------------------------------------------------------- return
def solve(self, A, b, x0=None, tol=10e-5, iters=300): r""" Solve linear system of equations by a Jacobi iterative method. @param A Linear system matrix. @param b Linear system independent term. @param x0 Initial aproximation of the solution. @param tol Relative error tolerance: \n \$ \vert\vert b - A \, x \vert \vert_\infty / \vert\vert b \vert \vert_\infty \$ @param iters Maximum number of iterations. """ # Create/set OpenCL buffers self.setBuffers(A, b, x0) # Get dimensions for OpenCL execution n = np.uint32(len(b)) gSize = (clUtils.globalSize(n), ) # Get a norm to can compare later for valid result bnorm = np.sqrt(cl_array.dot(self.b, self.b).get()) # Initialize the problem beta = bnorm self.dot_c_vec(1.0 / beta, self.u) kernelargs = (self.A, self.u.data, self.v.data, n) self.program.dot_matT_vec(self.queue, gSize, None, *(kernelargs)) alpha = np.sqrt(cl_array.dot(self.v, self.v).get()) self.dot_c_vec(1.0 / alpha, self.v) self.copy_vec(self.w, self.v) phibar = beta rhobar = alpha # Iterate while the result converges or maximum number # of iterations is reached. for i in range(0, iters): # Compute residues kernelargs = (self.A, self.b.data, self.x.data, self.r.data, n) self.program.r(self.queue, gSize, None, *(kernelargs)) rnorm = np.sqrt(cl_array.dot(self.r, self.r).get()) # Test if the final result has been reached if rnorm / bnorm <= tol: break # Compute next alpha, beta, u, v kernelargs = (self.A, self.u.data, self.v.data, self.u.data, alpha, n) self.program.u(self.queue, gSize, None, *(kernelargs)) beta = np.sqrt(cl_array.dot(self.u, self.u).get()) if not beta: break self.dot_c_vec(1.0 / beta, self.u) kernelargs = (self.A, self.u.data, self.v.data, self.v.data, beta, n) self.program.v(self.queue, gSize, None, *(kernelargs)) alpha = np.sqrt(cl_array.dot(self.v, self.v).get()) if not alpha: break self.dot_c_vec(1.0 / alpha, self.v) # Apply the orthogonal transformation c, s, rho = self.symOrtho(rhobar, beta) theta = s * alpha rhobar = -c * alpha phi = c * phibar phibar = s * phibar # Update x and w self.linear_comb(self.x, 1.0, self.x, phi / rho, self.w) self.linear_comb(self.w, 1.0, self.v, -theta / rho, self.w) # Return result computed x = np.zeros((n), dtype=np.float32) cl.enqueue_read_buffer(self.queue, self.x.data, x).wait() return (x, rnorm / bnorm, i + 1)
def solve(self, A, b, x0=None, tol=10e-5, iters=300): r""" Solve linear system of equations by a Jacobi iterative method. @param A Linear system matrix. @param b Linear system independent term. @param x0 Initial aproximation of the solution. @param tol Relative error tolerance: \n \$ \vert\vert b - A \, x \vert \vert_\infty / \vert\vert b \vert \vert_\infty \$ @param iters Maximum number of iterations. """ # Create/set OpenCL buffers self.setBuffers(A,b,x0) # Get dimensions for OpenCL execution n = np.uint32(len(b)) gSize = (clUtils.globalSize(n),) # Get a norm to can compare later for valid result bnorm = np.sqrt(cl_array.dot(self.b,self.b).get()) # Initialize the problem beta = bnorm self.dot_c_vec(1.0/beta, self.u) kernelargs = (self.A,self.u.data,self.v.data,n) self.program.dot_matT_vec(self.queue, gSize, None, *(kernelargs)) alpha = np.sqrt(cl_array.dot(self.v,self.v).get()) self.dot_c_vec(1.0/alpha, self.v) self.copy_vec(self.w, self.v) phibar = beta rhobar = alpha # Iterate while the result converges or maximum number # of iterations is reached. for i in range(0,iters): # Compute residues kernelargs = (self.A, self.b.data, self.x.data, self.r.data, n) self.program.r(self.queue, gSize, None, *(kernelargs)) rnorm = np.sqrt(cl_array.dot(self.r,self.r).get()) # Test if the final result has been reached if rnorm / bnorm <= tol: break # Compute next alpha, beta, u, v kernelargs = (self.A,self.u.data,self.v.data,self.u.data,alpha,n) self.program.u(self.queue, gSize, None, *(kernelargs)) beta = np.sqrt(cl_array.dot(self.u,self.u).get()) if not beta: break self.dot_c_vec(1.0/beta, self.u) kernelargs = (self.A,self.u.data,self.v.data,self.v.data,beta,n) self.program.v(self.queue, gSize, None, *(kernelargs)) alpha = np.sqrt(cl_array.dot(self.v,self.v).get()) if not alpha: break self.dot_c_vec(1.0/alpha, self.v) # Apply the orthogonal transformation c,s,rho = self.symOrtho(rhobar,beta) theta = s * alpha rhobar = -c * alpha phi = c * phibar phibar = s * phibar # Update x and w self.linear_comb(self.x, 1.0, self.x, phi/rho, self.w) self.linear_comb(self.w, 1.0, self.v, -theta/rho, self.w) # Return result computed x = np.zeros((n), dtype=np.float32) cl.enqueue_read_buffer(self.queue, self.x.data, x).wait() return (x, rnorm / bnorm, i+1)
def solve(self, A, B, x0=None, tol=10e-6, iters=300, w=1.0): r""" Solve linear system of equations by a Jacobi iterative method. @param A Linear system matrix. @param B Linear system independent term. @param x0 Initial aproximation of the solution. @param tol Relative error tolerance: \n \$ \vert\vert B - A \, x \vert \vert_\infty / \vert\vert B \vert \vert_\infty \$ @param iters Maximum number of iterations. @param w Relaxation factor (could be autoupdated if the method diverges) """ # Create/set OpenCL buffers w = np.float32(w) self.setBuffers(A,B,x0) # Get dimensions for OpenCL execution n = np.uint32(len(B)) gSize = (clUtils.globalSize(n),) # Get a norm to can compare later for valid result bnorm2 = cl_array.dot(self.B,self.B).get() FreeCAD.Console.PrintMessage(bnorm2) FreeCAD.Console.PrintMessage("\n") rnorm2 = 0. # Iterate while the result converges or maximum number # of iterations is reached. for i in range(0,iters): rnorm2 = self.rnorm2(self.X0) FreeCAD.Console.PrintMessage("\t") FreeCAD.Console.PrintMessage(rnorm2) FreeCAD.Console.PrintMessage(" -> ") FreeCAD.Console.PrintMessage(rnorm2 / bnorm2) FreeCAD.Console.PrintMessage("\n") if np.sqrt(rnorm2 / bnorm2) <= tol: break # Iterate kernelargs = (self.A, self.B.data, self.X0.data, self.X.data, w, n) self.program.jacobi(self.queue, gSize, None, *(kernelargs)) # Test if the result is diverging temp_rnorm2 = self.rnorm2(self.X) if(temp_rnorm2 > rnorm2): FreeCAD.Console.PrintMessage("\t\tDivergence found...\n\t\tw = ") w = w * rnorm2 / temp_rnorm2 FreeCAD.Console.PrintMessage(w) FreeCAD.Console.PrintMessage("\n") # Discard the result continue kernelargs = (self.A, self.B.data, self.X.data, self.X0.data, w, n) self.program.jacobi(self.queue, gSize, None, *(kernelargs)) # Return result computed cl.enqueue_read_buffer(self.queue, self.X0.data, self.x).wait() return (np.copy(self.x), np.sqrt(rnorm2 / bnorm2), i)
def solve(self, A, B, x0=None, tol=10e-6, iters=300): r""" Solve linear system of equations by a Jacobi iterative method. @param A Linear system matrix. @param B Linear system independent term. @param x0 Initial aproximation of the solution. @param tol Relative error tolerance: \n \$ \vert\vert B - A \, x \vert \vert_\infty / \vert\vert B \vert \vert_\infty \$ @param iters Maximum number of iterations. """ # Create/set OpenCL buffers self.setBuffers(A,B,x0) # Get dimensions for OpenCL execution n = np.uint32(len(B)) gSize = (clUtils.globalSize(n),) # Preconditionate matrix self.precondition(n) # Get a norm to can compare later for valid result bnorm = np.sqrt(self.dot(self.b,self.b).get()) FreeCAD.Console.PrintMessage(bnorm) FreeCAD.Console.PrintMessage("\n") # Initialize the problem beta = bnorm self.dot_c_vec(1.0/beta, self.u) kernelargs = (self.A,self.u.data,self.v.data,n) self.program.dot_matT_vec(self.queue, gSize, None, *(kernelargs)) alpha = np.sqrt(self.dot(self.v,self.v).get()) self.dot_c_vec(1.0/alpha, self.v) self.copy_vec(self.w, self.v) rhobar = alpha phibar = beta # Iterate while the result converges or maximum number # of iterations is reached. for i in range(0,iters): # Compute residues kernelargs = (self.A, self.b.data, self.x.data, self.r.data, n) self.program.r(self.queue, gSize, None, *(kernelargs)) rnorm = np.sqrt(self.dot(self.r,self.r).get()) FreeCAD.Console.PrintMessage("\t") FreeCAD.Console.PrintMessage(rnorm) FreeCAD.Console.PrintMessage("\n") # Test if the final result has been reached if rnorm / bnorm <= tol: break # Compute next alpha, beta, u, v kernelargs = (self.A,self.u.data,self.v.data,self.u.data,alpha,n) self.program.u(self.queue, gSize, None, *(kernelargs)) beta = np.sqrt(self.dot(self.u,self.u).get()) FreeCAD.Console.PrintMessage("\t beta=") FreeCAD.Console.PrintMessage(beta) FreeCAD.Console.PrintMessage("\n") self.dot_c_vec(1.0/beta, self.u) kernelargs = (self.A,self.u.data,self.v.data,self.v.data,beta,n) self.program.v(self.queue, gSize, None, *(kernelargs)) alpha = np.sqrt(self.dot(self.v,self.v).get()) FreeCAD.Console.PrintMessage("\t alpha=") FreeCAD.Console.PrintMessage(alpha) FreeCAD.Console.PrintMessage("\n") self.dot_c_vec(1.0/alpha, self.v) # Apply the orthogonal transformation rho = np.sqrt(rhobar*rhobar + beta*beta) c = rhobar/rho s = beta*rho theta = s*alpha rhobar = -c*alpha phi = c*phibar phibar = s*phibar # Update x and w self.linear_comb(self.x, 1, self.x, phi/rho, self.w) self.linear_comb(self.w, 1, self.v, theta/rho, self.w) # Correct returned result due to the precoditioning self.prod(self.x, self.xf, self.x) # Return result computed x = np.zeros((n), dtype=np.float32) cl.enqueue_read_buffer(self.queue, self.x.data, x).wait() return (x, rnorm / bnorm, i)
def solve(self, A, B, x0=None, tol=10e-6, iters=300, w=1.0): r""" Solve linear system of equations by a Jacobi iterative method. @param A Linear system matrix. @param B Linear system independent term. @param x0 Initial aproximation of the solution. @param tol Relative error tolerance: \n \$ \vert\vert B - A \, x \vert \vert_\infty / \vert\vert B \vert \vert_\infty \$ @param iters Maximum number of iterations. @param w Relaxation factor """ # Create/set OpenCL buffers w = np.float32(w) self.setBuffers(A,B,x0) # Get dimensions for OpenCL execution n = np.uint32(len(B)) gSize = (clUtils.globalSize(n),) # Get a norm to can compare later for valid result B_cl = cl_array.to_device(self.context,self.queue,B) bnorm2 = self.dot(B_cl,B_cl).get() w = w / bnorm2 FreeCAD.Console.PrintMessage(bnorm2) FreeCAD.Console.PrintMessage("\n") rnorm2 = 0. # Iterate while the result converges or maximum number # of iterations is reached. for i in range(0,iters): kernelargs = (self.A, self.B, self.X0, self.X, n) # Test if the final result has been reached self.program.r(self.queue, gSize, None, *(kernelargs)) cl.enqueue_read_buffer(self.queue, self.X, self.x).wait() x_cl = cl_array.to_device(self.context,self.queue,self.x) rnorm2 = self.dot(x_cl,x_cl).get() FreeCAD.Console.PrintMessage("\t") FreeCAD.Console.PrintMessage(rnorm2) FreeCAD.Console.PrintMessage("\n") if np.sqrt(rnorm2 / bnorm2) <= tol: break # Iterate kernelargs = (self.A, self.B, self.X0, self.X, w, n) self.program.jacobi(self.queue, gSize, None, *(kernelargs)) kernelargs = (self.A, self.B, self.X, self.X0, w, n) self.program.jacobi(self.queue, gSize, None, *(kernelargs)) # Return result computed cl.enqueue_read_buffer(self.queue, self.X0, self.x).wait() return (np.copy(self.x), np.sqrt(rnorm2 / bnorm2), i)