def _solve_cuda(lap_sparse, B, return_full_prob=False, maxiter=100, tol=5e-5): # get row col data lap_size = lap_sparse.get_shape() lap_sparse = lap_sparse.tocoo() rows = lap_sparse.row cols = lap_sparse.col data = lap_sparse.data lap_p = p.CompressedMatrix(lap_size[0], lap_size[1]) for row, col, val in zip(rows, cols, data): row = int(row) col = int(col) lap_p[row, col] = float(val) X = [] for i in range(len(B)): bi = -B[i].todense() bi = np.asarray(bi) bi = bi.reshape(-1) bi = bi.astype('float64') B_p = p.Vector(bi) x = p.solve(lap_p, B_p, p.upper_tag()) X.append(x) if not return_full_prob: X = np.array(X) X = np.argmax(X, axis=0) return X
def read_vector(fname, dtype): fd = open(fname) lines = map(lambda x: x.strip().split(" "), fd.readlines()) count = int(lines[0][0]) vector = list(map(lambda x: p.np_result_type(dtype).type(x), lines[1])) vector = p.Vector(vector, dtype=dtype) if vector.size != count: raise Exception("Sizes %d and %d do not match!" % (vector.size, count)) return vector
def test_kernel(*args, **kwargs): """ A, A_trans, B, B_trans must be numpy array or matrix instances """ epsilon = args[0] A_upper, A_unit_upper, A_lower, A_unit_lower, A_trans_upper, A_trans_unit_upper, A_trans_lower, A_trans_unit_lower = args[ 1] B, B_trans = args[2] vcl_A_upper, vcl_A_unit_upper, vcl_A_lower, vcl_A_unit_lower, vcl_A_trans_upper, vcl_A_trans_unit_upper, vcl_A_trans_lower, vcl_A_trans_unit_lower = args[ 3] vcl_B, vcl_B_trans = args[4] Bvec = B[::, 0] vcl_Bvec = p.Vector(vcl_B.value[::, 0]) # TODO: get rid of .value if not (Bvec == vcl_Bvec).all(): print(Bvec) print(vcl_Bvec) raise RuntimeError("Failed creating B vector") # A \ B vcl_X = p.solve(vcl_A_upper, vcl_B, p.upper_tag()) X = sp.solve(A_upper, B) act_diff = math.fabs(diff(X, vcl_X)) if act_diff > epsilon: raise RuntimeError("Failed solving A \ B for upper triangular A: %s" % act_diff) print("Test passed: solving A \ B for upper triangular A: %s" % act_diff) vcl_X = p.solve(vcl_A_unit_upper, vcl_B, p.unit_upper_tag()) X = sp.solve(A_unit_upper, B) act_diff = math.fabs(diff(X, vcl_X)) if act_diff > epsilon: raise RuntimeError( "Failed solving A \ B for unit upper triangular A: %s" % act_diff) print("Test passed: solving A \ B for unit upper triangular A: %s" % act_diff) vcl_X = p.solve(vcl_A_lower, vcl_B, p.lower_tag()) X = sp.solve(A_lower, B, lower=True) act_diff = math.fabs(diff(X, vcl_X)) if act_diff > epsilon: raise RuntimeError("Failed solving A \ B for lower triangular A: %s" % act_diff) print("Test passed: solving A \ B for lower triangular A: %s" % act_diff) vcl_X = p.solve(vcl_A_unit_lower, vcl_B, p.unit_lower_tag()) X = sp.solve(A_unit_lower, B, lower=True) act_diff = math.fabs(diff(X, vcl_X)) if act_diff > epsilon: raise RuntimeError( "Failed solving A \ B for unit lower triangular A: %s" % act_diff) print("Test passed: solving A \ B for unit lower triangular A: %s" % act_diff) # A^T \ B vcl_X = p.solve(vcl_A_trans_upper, vcl_B, p.upper_tag()) X = sp.solve(A_trans_upper, B) act_diff = math.fabs(diff(X, vcl_X)) if act_diff > epsilon: raise RuntimeError( "Failed solving A^T \ B for upper triangular A: %s" % act_diff) print("Test passed: solving A^T \ B for upper triangular A: %s" % act_diff) vcl_X = p.solve(vcl_A_trans_unit_upper, vcl_B, p.unit_upper_tag()) X = sp.solve(A_trans_unit_upper, B) act_diff = math.fabs(diff(X, vcl_X)) if act_diff > epsilon: raise RuntimeError( "Failed solving A^T \ B for unit upper triangular A: %s" % act_diff) print("Test passed: solving A^T \ B for unit upper triangular A: %s" % act_diff) vcl_X = p.solve(vcl_A_trans_lower, vcl_B, p.lower_tag()) X = sp.solve(A_trans_lower, B, lower=True) act_diff = math.fabs(diff(X, vcl_X)) if act_diff > epsilon: raise RuntimeError( "Failed solving A^T \ B for lower triangular A: %s" % act_diff) print("Test passed: solving A^T \ B for lower triangular A: %s" % act_diff) vcl_X = p.solve(vcl_A_trans_unit_lower, vcl_B, p.unit_lower_tag()) X = sp.solve(A_trans_unit_lower, B, lower=True) act_diff = math.fabs(diff(X, vcl_X)) if act_diff > epsilon: raise RuntimeError( "Failed solving A^T \ B for unit lower triangular A: %s" % act_diff) print("Test passed: solving A^T \ B for unit lower triangular A: %s" % act_diff) # A \ B^T vcl_X = p.solve(vcl_A_upper, vcl_B_trans, p.upper_tag()) X = sp.solve(A_upper, B_trans) act_diff = math.fabs(diff(X, vcl_X)) if act_diff > epsilon: raise RuntimeError( "Failed solving A \ B^T for upper triangular A: %s" % act_diff) print("Test passed: solving A \ B^T for upper triangular A: %s" % act_diff) vcl_X = p.solve(vcl_A_unit_upper, vcl_B_trans, p.unit_upper_tag()) X = sp.solve(A_unit_upper, B_trans) act_diff = math.fabs(diff(X, vcl_X)) if act_diff > epsilon: raise RuntimeError( "Failed solving A \ B^T for unit upper triangular A: %s" % act_diff) print("Test passed: solving A \ B^T for unit upper triangular A: %s" % act_diff) vcl_X = p.solve(vcl_A_lower, vcl_B_trans, p.lower_tag()) X = sp.solve(A_lower, B_trans, lower=True) act_diff = math.fabs(diff(X, vcl_X)) if act_diff > epsilon: raise RuntimeError( "Failed solving A \ B^T for lower triangular A: %s" % act_diff) print("Test passed: solving A \ B^T for lower triangular A: %s" % act_diff) vcl_X = p.solve(vcl_A_unit_lower, vcl_B_trans, p.unit_lower_tag()) X = sp.solve(A_unit_lower, B_trans, lower=True) act_diff = math.fabs(diff(X, vcl_X)) if act_diff > epsilon: raise RuntimeError( "Failed solving A \ B^T for unit lower triangular A: %s" % act_diff) print("Test passed: solving A \ B^T for unit lower triangular A: %s" % act_diff) # A^T \ B^T vcl_X = p.solve(vcl_A_trans_upper, vcl_B_trans, p.upper_tag()) X = sp.solve(A_trans_upper, B_trans) act_diff = math.fabs(diff(X, vcl_X)) if act_diff > epsilon: raise RuntimeError( "Failed solving A^T \ B^T for upper triangular A: %s" % act_diff) print("Test passed: solving A^T \ B^T for upper triangular A: %s" % act_diff) vcl_X = p.solve(vcl_A_trans_unit_upper, vcl_B_trans, p.unit_upper_tag()) X = sp.solve(A_trans_unit_upper, B_trans) act_diff = math.fabs(diff(X, vcl_X)) if act_diff > epsilon: raise RuntimeError( "Failed solving A^T \ B^T for unit upper triangular A: %s" % act_diff) print("Test passed: solving A^T \ B^T for unit upper triangular A: %s" % act_diff) vcl_X = p.solve(vcl_A_trans_lower, vcl_B_trans, p.lower_tag()) X = sp.solve(A_trans_lower, B_trans, lower=True) act_diff = math.fabs(diff(X, vcl_X)) if act_diff > epsilon: raise RuntimeError( "Failed solving A^T \ B^T for lower triangular A: %s" % act_diff) print("Test passed: solving A^T \ B^T for lower triangular A: %s" % act_diff) vcl_X = p.solve(vcl_A_trans_unit_lower, vcl_B_trans, p.unit_lower_tag()) X = sp.solve(A_trans_unit_lower, B_trans, lower=True) act_diff = math.fabs(diff(X, vcl_X)) if act_diff > epsilon: raise RuntimeError( "Failed solving A^T \ B^T for unit lower triangular A: %s" % act_diff) print("Test passed: solving A^T \ B^T for unit lower triangular A: %s" % act_diff) # GMRES vcl_X = p.solve(vcl_A_upper, vcl_Bvec, p.gmres_tag(tolerance=(epsilon / 10))) X, info = spsp.gmres(A_upper, Bvec, tol=(epsilon / 10)) act_diff = math.fabs(diff(X, vcl_X)) if act_diff > epsilon: raise RuntimeError("Failed solving A \ b using GMRES: %s" % act_diff) print("Test passed: solving A \ b using GMRES: %s" % act_diff) # CG -- TODO: need a symmetric positive definite matrix for test #vcl_X = p.solve(vcl_A_upper, vcl_Bvec, p.cg_tag()) #X, info = spsp.cg(A_upper, Bvec) #act_diff = math.fabs(diff(X, vcl_X)) #if act_diff > epsilon: # raise RuntimeError("Failed solving A \ b using CG: %s" % act_diff) #print("Test passed: solving A \ b using CG: %s" % act_diff) # BiCGStab -- TODO: need a non-symmetric matrix for test #vcl_X = p.solve(vcl_A_upper, vcl_Bvec, p.cg_tag()) #X, info = spsp.cg(A_upper, Bvec) #act_diff = math.fabs(diff(X, vcl_X)) #if act_diff > epsilon: # raise RuntimeError("Failed solving A \ b using CG: %s" % act_diff) #print("Test passed: solving A \ b using CG: %s" % act_diff) # TODO: in-place solvers return os.EX_OK
#!python """ PyViennaCL allows you to access and manipulate submatrices and subvectors using the usual Pythonic apparatus of slices and ranges of objects. ViennaCL provides object proxies to allow us to do these sub-manipulations in place. Here, we give some basic examples. """ import pyviennacl as p import numpy as np # Create some small, simple Vector and Matrix instances x = p.Vector(6, 1.0) a = p.Matrix(6, 6, 1.0) print("x is %s" % x) print("a is\n%s" % a) # Scale the first half of the Vector x x[0:3] *= 2.0 # Show the new x print("x is now %s" % x) # Create a smaller matrix from a submatrix of a b = a[3:6, 3:6] * 4.0 # Set the upper-left corner of the matrix to 4.0s a[0:3, 0:3] = b
If you are familiar with NumPy, you might need about 5 minutes to grasp the content here. If you are not so familiar, you might need 10 minutes. """ # Import PyViennaCL and NumPy import pyviennacl as p import numpy as np # Create our datastructures on the host x = [1.0, 2.0, 3.0, 4.0, 5.0] # We can create PyViennaCL Vectors from lists a = np.array([[1.0, 2.0, 3.0], [0.0, 3.0, 4.0], [0.0, 0.0, 5.0]]) # We can create PyViennaCL Matrices from arrays # Create corresponding ViennaCL datastructures on the compute device y = p.Vector() b = p.Matrix(a) # This is a dense matrix # Copy the data back to the host and check that it's equal z = y.value # z is now a 1-D numpy array with dtype float64 c = b.value # c is now a 2-D numpy array with dtype float64 if (z == x).all() and (c == a).all(): print("Successfully transferred data to and from the compute device!") # We can modify elements of the ViennaCL device structures, but since this # incurs a compute kernel initialisation and buffer transfer, it is very slow! print('1111111') y[0] = float(0.0) print('22222222') b[2, 2] = float(-1.0)
def test_scalar(epsilon, dtype): """ Basic arithmetic: + add, sub, mul, div Scalar result types: Host and Device scalars """ a = dtype(random.random()) b = dtype(random.random()) c = dtype(random.random()) d = dtype(random.random()) alpha = p.Scalar(a) beta = p.Scalar(b) gamma = p.HostScalar(c) delta = p.HostScalar(d) if not alpha == a: raise RuntimeError("Failed to initialise device scalar") if not beta == b: raise RuntimeError("Failed to initialise device scalar") if not gamma == c: raise RuntimeError("Failed to initialise host scalar") if not delta == d: raise RuntimeError("Failed to initialise host scalar") print("Test: scalar initialisation passed") ### Test copy A = alpha.copy() if A != a: raise RuntimeError("Failed to copy device scalar") G = gamma.copy() if G != c: raise RuntimeError("Failed to copy host scalar") print("Test: scalar copy passed") ### Test inter-type initialisation A_tmp = A A = p.Scalar(G) if A != c: raise RuntimeError("Failed to initialise device scalar from copied host scalar") B = p.HostScalar(beta) if B != b: raise RuntimeError("Failed to initialise host scalar from device scalar") G = p.HostScalar(A_tmp) if G != a: raise RuntimeError("Failed to initialise host scalar from copied device scalar") D = p.Scalar(delta) if D != d: raise RuntimeError("Failed to initialise device scalar from host scalar") print("Test: inter-type scalar initialisation passed") ### pyvcl type arithmetic X = (a / c) + (b ** a) * (c - d) // b Y = (alpha / gamma) + (beta ** alpha) * (gamma - delta) // beta X += a Y += alpha X -= b Y -= beta X *= c Y *= gamma X /= d Y /= delta X **= dtype(2) Y **= p.HostScalar(dtype(2)) X //= Y Y //= Y if (X - Y) > epsilon: raise RuntimeError("Failed basic arithmetic test") print("Test: basic arithmetic passed") ### Inter-type arithmetic X = (a / gamma) + (b ** alpha) * (c - delta) // beta Y = (alpha / c) + (beta ** a) * (gamma - d) // b X = p.Scalar(X, dtype = dtype) X += alpha Y += a X -= b Y -= beta X *= gamma Y *= c X /= d Y /= delta X **= p.HostScalar(dtype(2)) Y **= dtype(2) if (X - Y) > epsilon: raise RuntimeError("Failed inter-type arithmetic test") print("Test: inter-type arithmetic passed") ### Scalar result type arithmetic """ + Norm_1, Norm_2, Norm_Inf + Element* operations? + Dot (ie, inner product) """ vec = p.Vector([X, a, beta, c, delta, Y], dtype = dtype) r1 = vec.norm(1) r2 = vec.norm(2) r3 = vec.norm(p.inf) r4 = vec.dot(vec * alpha) R1 = r1.value R2 = r2.value R3 = r3.value R4 = r4.value X = (r1 * a + r2 * beta - r3 / c - r4 // delta) * (r1 + r4 - R3) Y = (R1 * a + R2 * beta - R3 / c - R4 // delta) * (R1 + R4 - r3) if (X - Y) > epsilon: raise RuntimeError("Failed scalar result type arithmetic test") print("Test: scalar result type arithmetic passed")
If you are familiar with NumPy, you might need about 5 minutes to grasp the content here. If you are not so familiar, you might need 10 minutes. """ # Import PyViennaCL and NumPy import pyviennacl as p import numpy as np # Create our datastructures on the host x = [1.0, 2.0, 3.0, 4.0, 5.0] # We can create PyViennaCL Vectors from lists a = np.array([[1.0, 2.0, 3.0], [0.0, 3.0, 4.0], [0.0, 0.0, 5.0]]) # We can create PyViennaCL Matrices from arrays # Create corresponding ViennaCL datastructures on the compute device y = p.Vector(x) b = p.Matrix(a) # This is a dense matrix # Copy the data back to the host and check that it's equal z = y.value # z is now a 1-D numpy array with dtype float64 c = b.value # c is now a 2-D numpy array with dtype float64 if (z == x).all() and (c == a).all(): print("Successfully transferred data to and from the compute device!") # We can modify elements of the ViennaCL device structures, but since this # incurs a compute kernel initialisation and buffer transfer, it is very slow! y[0] = 0.0 b[2, 2] = -1.0 x[0] = 0.0 # We should also modify our original data to keep track..
def run_test(*args, **kwargs): """ A, A_trans, B, B_trans must be numpy array or matrix instances """ epsilon = args[0] A = args[1] A_trans = args[2] B = args[3] B_trans = args[4] C = args[5] vcl_A = args[6] vcl_A_trans = args[7] vcl_B = args[8] vcl_B_trans = args[9] vcl_C = args[10] dtype = kwargs['dtype'] alpha = p.Scalar(dtype(3.1415)) beta = p.HostScalar(dtype(2.718)) # Test initialisers # + GPU scalar TODO #X = p.Matrix(A.shape, alpha) #if not (X == (np.ones(A.shape, dtype = dtype) * alpha.value)).all(): # raise RuntimeError("Failed: GPU scalar matrix init") #print("Test: initialisation of matrix with GPU scalar passed") # + CPU scalar TODO Y = p.Matrix(A.shape, beta.value) # TODO if not (Y == (np.ones(A.shape, dtype=dtype) * beta.value)).all(): raise RuntimeError("Failed: CPU scalar matrix init") print("Test: initialisation of matrix with CPU scalar passed") # + ndarray X = p.Matrix(np.ones(A.shape, dtype=dtype) * beta.value) if not (X == (np.ones(A.shape, dtype=dtype) * beta.value)).all(): raise RuntimeError("Failed: ndarray matrix init") print("Test: initialisation of matrix with ndarray passed") # + Matrix X = p.Matrix(Y) if not (X == Y).all(): raise RuntimeError("Failed: Matrix Matrix init") print("Test: initialisation of matrix with Matrix passed") # + CompressedMatrix -- TODO: sparse matrices + dtypes #Y = p.CompressedMatrix(X) #X = p.Matrix(Y) #if not (X == Y).all(): # raise RuntimeError("Failed: Matrix CompressedMatrix init") #print("Test: initialisation of matrix with CompressedMatrix passed") # In-place add X = vcl_A.value X += vcl_B.value vcl_A += vcl_B if not (vcl_A == X).all(): raise RuntimeError("Failed: in-place add") print("Test: in-place add passed") # Scaled in-place add X += alpha.value * vcl_B.value vcl_A += alpha * vcl_B if not (vcl_A == X).all(): raise RuntimeError("Failed: scaled in-place add") print("Test: scaled in-place add passed") # Add Y = vcl_A.value + vcl_B.value Z = vcl_A + vcl_B if not (Y == Z).all(): raise RuntimeError("Failed: add") print("Test: add passed") # Scaled add (left) Y = dtype(alpha.value) * vcl_B.value + vcl_C.value Z = alpha * vcl_B + vcl_C act_diff = math.fabs(diff(Y, Z)) if act_diff > epsilon: raise RuntimeError("Failed: scaled add (left)") print("Test: scaled add (left) passed") # Scaled add (right) Y = vcl_B.value + dtype(alpha.value) * vcl_C.value Z = vcl_B + alpha * vcl_C act_diff = math.fabs(diff(Y, Z)) if act_diff > epsilon: # (Z == Y).all(): raise RuntimeError("Failed: scaled add (left)") print("Test: scaled add (right) passed") # Scaled add (both) Y = alpha.value * vcl_B.value + alpha.value * vcl_C.value Z = alpha * vcl_B + alpha * vcl_C act_diff = math.fabs(diff(Y, Z)) if act_diff > epsilon: raise RuntimeError("Failed: scaled add (both)") print("Test: scaled add (both) passed") # In-place sub X = vcl_A.value X -= vcl_B.value vcl_A -= vcl_B if not (vcl_A == X).all(): raise RuntimeError("Failed: in-place sub") print("Test: in-place sub passed") # Scaled in-place sub X -= alpha.value * vcl_B.value vcl_A -= alpha * vcl_B if not (vcl_A == X).all(): raise RuntimeError("Failed: scaled in-place sub") print("Test: scaled in-place sub passed") # Sub Y = vcl_A.value - vcl_B.value Z = vcl_A - vcl_B if not (Y == Z).all(): raise RuntimeError("Failed: sub") print("Test: sub passed") # Scaled sub (left) Y = alpha.value * vcl_B.value - vcl_C.value Z = alpha * vcl_B - vcl_C act_diff = math.fabs(diff(Y, Z)) if act_diff > epsilon: raise RuntimeError("Failed: scaled sub (left)") print("Test: scaled sub (left) passed") # Scaled sub (right) Y = vcl_B.value - alpha.value * vcl_C.value Z = vcl_B - alpha * vcl_C act_diff = math.fabs(diff(Y, Z)) if act_diff > epsilon: raise RuntimeError("Failed: scaled sub (right)") print("Test: scaled sub (right) passed") # Scaled sub (both) Y = alpha.value * vcl_B.value - alpha.value * vcl_C.value Z = alpha * vcl_B - alpha * vcl_C act_diff = math.fabs(diff(Y, Z)) if act_diff > epsilon: raise RuntimeError("Failed: scaled sub (both)") print("Test: scaled sub (both) passed") # Scalar multiplication (CPU scalar) -- not supported yet #gamma_py = beta.value * beta.value #gamma_vcl = beta * beta # ... # Scalar multiplication (GPU scalar) # Matrix-vector multiplication vec = p.Vector(vcl_A.shape[0], 3.1415, dtype=dtype) X = vcl_A * vec Y = vcl_A.value.dot(vec.value) act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: matrix-vector multiplication") print("Test: matrix-vector multiplication passed") # Matrix divided by scalar X = vcl_A.value / alpha.value Y = vcl_A / alpha act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: matrix-scalar division") print("Test: matrix-scalar division passed") # Binary elementwise operations -- prod and div X = vcl_A.value * vcl_B.value Y = p.ElementProd(vcl_A, vcl_B) act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise matrix-matrix multiplication") print("Test: elementwise matrix-matrix multiplication passed") X = vcl_A.value**vcl_B.value Y = vcl_A**vcl_B act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise matrix-matrix exponentiation") print("Test: elementwise matrix-matrix exponentiation passed") X = vcl_A.value / vcl_B.value Y = p.ElementDiv(vcl_A, vcl_B) act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise matrix-matrix division") print("Test: elementwise matrix-matrix division passed") # Unary elementwise operations # - abs TODO #X = abs(vcl_A.value) #Y = p.ElementAbs(vcl_A) #act_diff = math.fabs(diff(X, Y)) #if act_diff > epsilon: # raise RuntimeError("Failed: elementwise abs") #print("Test: elementwise abs passed") # - acos X = np.arccos(vcl_A.value) Y = p.ElementAcos(vcl_A).result # TODO THIS SHOULDN'T BE REQUIRED act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise acos") print("Test: elementwise acos passed") # - asin X = np.arcsin(vcl_A.value) Y = p.ElementAsin(vcl_A).result act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise asin") print("Test: elementwise asin passed") # - atan X = np.arctan(vcl_A.value) Y = p.ElementAtan(vcl_A).result act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise atan") print("Test: elementwise atan passed") # - ceil X = np.ceil(vcl_A.value) Y = p.ElementCeil(vcl_A).result act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise ceil") print("Test: elementwise ceil passed") # - cos X = np.cos(vcl_A.value) Y = p.ElementCos(vcl_A).result act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise cos") print("Test: elementwise cos passed") # - cosh X = np.cosh(vcl_A.value) Y = p.ElementCosh(vcl_A).result act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise cosh") print("Test: elementwise cosh passed") # - exp X = np.exp(vcl_A.value) Y = p.ElementExp(vcl_A).result act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise exp") print("Test: elementwise exp passed") # - fabs X = np.fabs(vcl_A.value) Y = p.ElementFabs(vcl_A).result act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise fabs") print("Test: elementwise fabs passed") # - floor X = np.floor(vcl_A.value) Y = p.ElementFloor(vcl_A).result act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise floor") print("Test: elementwise floor passed") # - log X = np.log(vcl_A.value) Y = p.ElementLog(vcl_A).result act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise log") print("Test: elementwise log passed") # - log10 X = np.log10(vcl_A.value) Y = p.ElementLog10(vcl_A).result act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise log10") print("Test: elementwise log10 passed") # - sin X = np.sin(vcl_A.value) Y = p.ElementSin(vcl_A).result act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise sin") print("Test: elementwise sin passed") # - sinh X = np.sinh(vcl_A.value) Y = p.ElementSinh(vcl_A).result act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise sinh") print("Test: elementwise sinh passed") # - sqrt X = np.sqrt(vcl_A.value) Y = p.ElementSqrt(vcl_A).result act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise sqrt") print("Test: elementwise sqrt passed") # - tan X = np.tan(vcl_A.value) Y = p.ElementTan(vcl_A).result act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise tan") print("Test: elementwise tan passed") # - tanh X = np.tanh(vcl_A.value) Y = p.ElementTanh(vcl_A).result act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise tanh") print("Test: elementwise tanh passed") # - trans TODO ?!!! #X = vcl_A.value.T #Y = vcl_A.T.result #p.Trans(vcl_A).result #act_diff = math.fabs(diff(X, Y)) #if act_diff > epsilon: # raise RuntimeError("Failed: elementwise trans") #print("Test: elementwise trans passed") # - norm1 -- TODO ONLY FOR VECTORS # - norm2 -- TODO ONLY FOR VECTORS # - norm_inf -- TODO ONLY FOR VECTORS return os.EX_OK
def run_test(*args, **kwargs): """ A and B must be numpy array or matrix instances with one dimension """ epsilon = args[0] A = args[1] B = args[2] C = args[3] vcl_A = args[4] vcl_B = args[5] vcl_C = args[6] dtype = np.result_type(kwargs['dtype']).type alpha = p.Scalar(dtype(3.1415)) ## TODO SHOULD BE GPU SCALAR beta = p.HostScalar(dtype(2.718)) ### ### TODO MISSING: ### + cpu / gpu combos ### + elementwise power function? ### # Test initialisers # + GPU scalar TODO #X = p.Vector(A.shape, alpha) #if not (X == (np.ones(A.shape, dtype = dtype) * alpha.value)).all(): # raise RuntimeError("Failed: GPU scalar vector init") #print("Test: initialisation of vector with GPU scalar passed") # + CPU scalar TODO Y = p.Vector(A.shape[0], beta.value) # TODO if not (Y == (np.ones(A.shape, dtype=dtype) * beta.value)).all(): raise RuntimeError("Failed: CPU scalar vector init") print("Test: initialisation of vector with CPU scalar passed") # + ndarray X = p.Vector(np.ones(A.shape, dtype=dtype) * beta.value) if not (X == (np.ones(A.shape, dtype=dtype) * beta.value)).all(): raise RuntimeError("Failed: ndarray vector init") print("Test: initialisation of vector with ndarray passed") # + Vector X = p.Vector(Y) if not (X == Y).all(): raise RuntimeError("Failed: Vector Vector init") print("Test: initialisation of vector with Vector passed") # Negation X = -vcl_A Y = -vcl_A.value act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: negation") print("Test: negation passed") # Inner product X = vcl_A.dot(vcl_B) Y = vcl_A.value.dot(vcl_B.value) act_diff = math.fabs(X - Y) if act_diff > 0.01: # NB: numpy seems to be imprecise here raise RuntimeError("Failed: inner product of vectors") print("Test: inner product of vectors passed") # In-place scaling (multiplication by scalar) X = vcl_A.value X *= beta.value vcl_A *= beta act_diff = math.fabs(diff(X, vcl_A)) if act_diff > epsilon: raise RuntimeError("Failed: in-place scale (multiplication)") print("Test: in-place scale (multiplication) passed") # In-place scaling (division by scalar) X = vcl_A.value X /= alpha.value vcl_A /= alpha act_diff = math.fabs(diff(X, vcl_A)) if act_diff > epsilon: raise RuntimeError("Failed: in-place scale (division)") print("Test: in-place scale (division) passed") # In-place add X = vcl_A.value X += vcl_B.value vcl_A += vcl_B act_diff = math.fabs(diff(X, vcl_A)) if act_diff > epsilon: raise RuntimeError("Failed: in-place add") print("Test: in-place add passed") # Scaled in-place add X += alpha.value * vcl_B.value vcl_A += alpha * vcl_B act_diff = math.fabs(diff(X, vcl_A)) if act_diff > epsilon: raise RuntimeError("Failed: scaled in-place add") print("Test: scaled in-place add passed") # Add Y = vcl_A.value + vcl_B.value Z = vcl_A + vcl_B act_diff = math.fabs(diff(Y, Z)) if act_diff > epsilon: raise RuntimeError("Failed: add") print("Test: add passed") # Scaled add (left) Y = dtype(alpha.value) * vcl_B.value + vcl_C.value Z = alpha * vcl_B + vcl_C act_diff = math.fabs(diff(Y, Z)) if act_diff > epsilon: print(act_diff) print(Y, type(Y)) print(Z, type(Z)) print(Z - Y) raise RuntimeError("Failed: scaled add (left)") print("Test: scaled add (left) passed") # Scaled add (right) Y = vcl_B.value + dtype(alpha.value) * vcl_C.value Z = vcl_B + alpha * vcl_C act_diff = math.fabs(diff(Y, Z)) if act_diff > epsilon: # (Z == Y).all(): pass raise RuntimeError("Failed: scaled add (left)") print("Test: scaled add (right) passed") # Scaled add (both) Y = alpha.value * vcl_B.value + alpha.value * vcl_C.value Z = alpha * vcl_B + alpha * vcl_C act_diff = math.fabs(diff(Y, Z)) if act_diff > epsilon: pass raise RuntimeError("Failed: scaled add (both)") print("Test: scaled add (both) passed") # In-place sub X = vcl_A.value X -= vcl_B.value vcl_A -= vcl_B if not (vcl_A == X).all(): raise RuntimeError("Failed: in-place sub") print("Test: in-place sub passed") # Scaled in-place sub X -= alpha.value * vcl_B.value vcl_A -= alpha * vcl_B if not (vcl_A == X).all(): raise RuntimeError("Failed: scaled in-place sub") print("Test: scaled in-place sub passed") # Sub Y = vcl_A.value - vcl_B.value Z = vcl_A - vcl_B if not (Y == Z).all(): raise RuntimeError("Failed: sub") print("Test: sub passed") # Scaled sub (left) Y = alpha.value * vcl_B.value - vcl_C.value Z = alpha * vcl_B - vcl_C act_diff = math.fabs(diff(Y, Z)) if act_diff > epsilon: raise RuntimeError("Failed: scaled sub (left)") print("Test: scaled sub (left) passed") # Scaled sub (right) Y = vcl_B.value - alpha.value * vcl_C.value Z = vcl_B - alpha * vcl_C act_diff = math.fabs(diff(Y, Z)) if act_diff > epsilon: raise RuntimeError("Failed: scaled sub (right)") print("Test: scaled sub (right) passed") # Scaled sub (both) Y = alpha.value * vcl_B.value - alpha.value * vcl_C.value Z = alpha * vcl_B - alpha * vcl_C act_diff = math.fabs(diff(Y, Z)) if act_diff > epsilon: raise RuntimeError("Failed: scaled sub (both)") print("Test: scaled sub (both) passed") # Scalar multiplication (CPU scalar) -- not supported yet #gamma_py = beta.value * beta.value #gamma_vcl = beta * beta # ... # Scalar multiplication (GPU scalar) # Matrix divided by scalar X = vcl_A.value / alpha.value Y = vcl_A / alpha act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: matrix-scalar division") print("Test: matrix-scalar division passed") # Binary elementwise operations -- prod and div X = vcl_A.value * vcl_B.value Y = p.ElementProd(vcl_A, vcl_B) act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise matrix-matrix multiplication") print("Test: elementwise matrix-matrix multiplication passed") X = vcl_A.value / vcl_B.value Y = p.ElementDiv(vcl_A, vcl_B) act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise matrix-matrix division") print("Test: elementwise matrix-matrix division passed") # Unary elementwise operations # - abs TODO #X = abs(vcl_A.value) #Y = p.ElementAbs(vcl_A) #act_diff = math.fabs(diff(X, Y)) #if act_diff > epsilon: # raise RuntimeError("Failed: elementwise abs") #print("Test: elementwise abs passed") # - acos X = np.arccos(vcl_A.value) Y = p.ElementAcos(vcl_A).result # TODO THIS SHOULDN'T BE REQUIRED act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise acos") print("Test: elementwise acos passed") # - asin X = np.arcsin(vcl_A.value) Y = p.ElementAsin(vcl_A).result act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise asin") print("Test: elementwise asin passed") # - atan X = np.arctan(vcl_A.value) Y = p.ElementAtan(vcl_A).result act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise atan") print("Test: elementwise atan passed") # - ceil X = np.ceil(vcl_A.value) Y = p.ElementCeil(vcl_A).result act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise ceil") print("Test: elementwise ceil passed") # - cos X = np.cos(vcl_A.value) Y = p.ElementCos(vcl_A).result act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise cos") print("Test: elementwise cos passed") # - cosh X = np.cosh(vcl_A.value) Y = p.ElementCosh(vcl_A).result act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise cosh") print("Test: elementwise cosh passed") # - exp X = np.exp(vcl_A.value) Y = p.ElementExp(vcl_A).result act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise exp") print("Test: elementwise exp passed") # - fabs X = np.fabs(vcl_A.value) Y = p.ElementFabs(vcl_A).result act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise fabs") print("Test: elementwise fabs passed") # - floor X = np.floor(vcl_A.value) Y = p.ElementFloor(vcl_A).result act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise floor") print("Test: elementwise floor passed") # - log X = np.log(vcl_A.value) Y = p.ElementLog(vcl_A).result act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise log") print("Test: elementwise log passed") # - log10 X = np.log10(vcl_A.value) Y = p.ElementLog10(vcl_A).result act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise log10") print("Test: elementwise log10 passed") # - sin X = np.sin(vcl_A.value) Y = p.ElementSin(vcl_A).result act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise sin") print("Test: elementwise sin passed") # - sinh X = np.sinh(vcl_A.value) Y = p.ElementSinh(vcl_A).result act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise sinh") print("Test: elementwise sinh passed") # - sqrt X = np.sqrt(vcl_A.value) Y = p.ElementSqrt(vcl_A).result act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise sqrt") print("Test: elementwise sqrt passed") # - tan X = np.tan(vcl_A.value) Y = p.ElementTan(vcl_A).result act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise tan") print("Test: elementwise tan passed") # - tanh X = np.tanh(vcl_A.value) Y = p.ElementTanh(vcl_A).result act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: elementwise tanh") print("Test: elementwise tanh passed") # - norm1 X = np.linalg.norm(vcl_A.value, 1) Y = p.norm(vcl_A, 1) # or vcl_A.norm(1) act_diff = math.fabs(X - Y) if act_diff > epsilon: print(vcl_A) #raise RuntimeError("Failed: norm(1)") print("Test: norm(1) passed") # - norm2 X = np.linalg.norm(vcl_A.value, 2) Y = vcl_A.norm(2) # or vcl_A.norm(1) act_diff = math.fabs(X - Y) if act_diff > epsilon: raise RuntimeError("Failed: norm(2)") print("Test: norm(2) passed") # - norm_inf X = np.linalg.norm(vcl_A.value, np.inf) Y = vcl_A.norm(np.inf) act_diff = math.fabs(X - Y) if act_diff > epsilon: raise RuntimeError("Failed: norm(inf)") print("Test: norm(inf) passed") # in-place multiply-division-add X = vcl_C.value X += alpha.value * vcl_A.value + vcl_B.value / beta.value vcl_C += alpha * vcl_A + vcl_B / beta act_diff = math.fabs(diff(X, vcl_C)) if act_diff > epsilon: raise RuntimeError("Failed: in-place multiply-division-add") print("Test: in-place multiply-division-add passed") # lengthy sum of scaled vectors X = alpha.value * vcl_A.value - vcl_B.value / beta.value + vcl_A.value * beta.value - vcl_B.value / alpha.value + vcl_C.value Y = alpha * vcl_A - vcl_B / beta + vcl_A * beta - vcl_B / alpha + vcl_C act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: lengthy sum of scaled vectors") print("Test: lengthy sum of scaled vectors passed") # sub-expression X = vcl_A.value + (( (vcl_C.value + vcl_B.value) * alpha.value) - vcl_B.value) / beta.value Y = vcl_A + (((vcl_C + vcl_B) * alpha) - vcl_B) / beta act_diff = math.fabs(diff(X, Y)) if act_diff > epsilon: raise RuntimeError("Failed: vector sub-expression test %s") print("Test: vector sub-expression passed") # plane rotation V = (alpha * vcl_A + beta * vcl_B).result W = (alpha * vcl_B - beta * vcl_A).result p.plane_rotation(vcl_A, vcl_B, alpha, beta) act_diffB = math.fabs(diff(W, vcl_B)) act_diffA = math.fabs(diff(V, vcl_A)) act_diffA = math.fabs(diff(V.value, vcl_A.value)) if act_diffA > epsilon or act_diffB > epsilon: print(act_diffA, act_diffB) print(vcl_A) print(V) print(p.ElementFabs(V - vcl_A)) #print(W, vcl_B) raise RuntimeError("Failed: plane rotation") print("Test: plane rotation passed") return os.EX_OK
# We want a square N x N system. N = 5 # Create a NumPy matrix with float32 precision to hold the data on the host. # Firstly, we create an empty matrix, then fill the upper triangle with values. A = np.zeros((N, N), dtype = np.float32) for i in range(N): for j in range(N): if j >= i: A[i, j] = np.float32(random.randint(0,1000) / 100.0) # Transfer the system matrix to the compute device A = p.Matrix(A) print("A is\n%s" % A) # Create a right-hand-side vector on the host with random elements # and transfer it to the compute device b = p.Vector(np.random.rand(N).astype(np.float32)) print("b is %s" % b) # Solve the system; note the choice of tag to denote an upper triangular system x = p.solve(A, b, p.upper_tag()) # Copy the solution from the device to host and display it print("Solution of Ax = b for x:\n%s" % x)
import pyviennacl as p import random # First, we create an empty 5 x 5 CompressedMatrix: A = p.CompressedMatrix(5, 5) # Let's set some random values of A. # # Importantly, setting individual elements of a PyViennaCL sparse matrix is # not nearly as expensive as setting individual elements of a dense matrix or # vector, since in the sparse matrix case, the elements are cached on the host # and only transferred to the device when they are needed for some computation. for i in range(6): x = random.randrange(0, 4, 1) y = random.randrange(0, 4, 1) A[x, y] = random.random() print("A is:\n%s" % A.value) # Now, let's construct a simple vector of 5 elements. b = p.Vector(5, 3.142) print("b is %s" % b) # Now, represent the product: c = A * b # And the result is only computed when we need to print it: print("A * b = c is %s" % c)
Here, we demonstrate the different notation for these products. """ import pyviennacl as p import numpy as np # Let's construct some random 1-D and 2-D arrays v = np.random.rand(5) w = np.random.rand(5) f = np.random.rand(5, 5) g = np.random.rand(5, 5) # Now transfer them to the compute device x, y = p.Vector(v), p.Vector(w) a, b = p.Matrix(f), p.Matrix(g) print("a is\n%s" % a) print("b is\n%s" % b) print("x is %s" % x) print("y is %s" % y) # # Scaling # # Represent the scaling of x by 2.0 z = x * 2.0