def compute_analysis_cuda2(self, xb, y, R, P, H, HT=None, hph=None, calcP=True): if HT is None: HT = culinalg.transpose(H) HP = culinalg.dot(H, P) if hph is None: hph = culinalg.dot(HP, HT) Rhph = misc.add(R, hph) inv = culinalg.inv(Rhph) W = culinalg.dot(HP, inv, transa='T') Hxb = culinalg.dot(H, xb) yHxb = misc.subtract(y, Hxb) WyHxb = culinalg.dot(W, yHxb) xhat = misc.add(xb, WyHxb) #xhat = xb + culinalg.dot(W, (y - culinalg.dot(H, xb))) if calcP: I = culinalg.eye(P.shape[0]) WH = culinalg.dot(W, H) IWH = I - WH Phat = culinalg.dot(IWH, P) else: Phat = misc.zeros((1, ), dtype=P.dtype) return xhat, Phat
def impl_test_binaryop_2d(self, dtype): if issubclass(dtype, numbers.Integral): a_sca = np.array(np.random.randint(1, 10), dtype=dtype) b_sca = np.array(np.random.randint(1, 10), dtype=dtype) a_vec = np.random.randint(1, 10, 3).astype(dtype) b_vec = np.random.randint(1, 10, 3).astype(dtype) a_mat = np.random.randint(1, 10, 6).reshape((3, 2)).astype(dtype) b_mat = np.random.randint(1, 10, 6).reshape((3, 2)).astype(dtype) else: a_sca = np.random.normal(scale=5.0, size=()).astype(dtype) b_sca = np.random.normal(scale=5.0, size=()).astype(dtype) a_vec = np.random.normal(scale=5.0, size=(3, )).astype(dtype) b_vec = np.random.normal(scale=5.0, size=(3, )).astype(dtype) a_mat = np.random.normal(scale=5.0, size=(3, 2)).astype(dtype) b_mat = np.random.normal(scale=5.0, size=(3, 2)).astype(dtype) a_sca_gpu = gpuarray.to_gpu(a_sca) b_sca_gpu = gpuarray.to_gpu(b_sca) a_vec_gpu = gpuarray.to_gpu(a_vec) b_vec_gpu = gpuarray.to_gpu(b_vec) a_mat_gpu = gpuarray.to_gpu(a_mat) b_mat_gpu = gpuarray.to_gpu(b_mat) # addition assert np.allclose(misc.add(a_sca_gpu, b_sca_gpu).get(), a_sca + b_sca) assert np.allclose(misc.add(a_vec_gpu, b_vec_gpu).get(), a_vec + b_vec) assert np.allclose(misc.add(a_mat_gpu, b_mat_gpu).get(), a_mat + b_mat) # subtract assert np.allclose( misc.subtract(a_sca_gpu, b_sca_gpu).get(), a_sca - b_sca) assert np.allclose( misc.subtract(a_vec_gpu, b_vec_gpu).get(), a_vec - b_vec) assert np.allclose( misc.subtract(a_mat_gpu, b_mat_gpu).get(), a_mat - b_mat) # multiplication assert np.allclose( misc.multiply(a_sca_gpu, b_sca_gpu).get(), a_sca * b_sca) assert np.allclose( misc.multiply(a_vec_gpu, b_vec_gpu).get(), a_vec * b_vec) assert np.allclose( misc.multiply(a_mat_gpu, b_mat_gpu).get(), a_mat * b_mat) # division assert np.allclose( misc.divide(a_sca_gpu, b_sca_gpu).get(), a_sca / b_sca) assert np.allclose( misc.divide(a_vec_gpu, b_vec_gpu).get(), a_vec / b_vec) assert np.allclose( misc.divide(a_mat_gpu, b_mat_gpu).get(), a_mat / b_mat)
def thunk(): alpha = gpuarray.to_gpu(np.squeeze(np.asarray(inputs[0]))[:, None]) x_t = gpuarray.to_gpu(np.asarray(inputs[1])[0, :, :]) x_f = gpuarray.to_gpu(np.asarray(inputs[2])[0, :, :]) Xt = cumath.exp(misc.add(linalg.dot(x_t, A), b)) Xf = cumath.exp(misc.add(linalg.dot(x_f, A), b)) Xtn = misc.sum(Xt, axis=1, keepdims=True) Xfn = misc.sum(Xf, axis=1, keepdims=True) Xt = misc.divide(Xt, Xtn) Xf = misc.divide(Xf, Xfn) w = misc.multiply(Xt, alpha) + misc.multiply(Xf, 1 - alpha) dq = Xt - Xf qdw = dq / w t1 = misc.sum(x * qdw, axis=1) f = 2 * depth + self.base.n t2 = f * misc.sum(dq, axis=1) / misc.sum(w, axis=1) t3 = misc.sum(x, axis=1) * misc.sum(qdw, axis=1) dalpha = t1 - t2 + t3 del dq, t1, f, t2, t3 iw = 1 / w S1 = misc.multiply( depth[:, None] * (self.base.n - 1) / self.base.n, iw) S2 = (self.base.n + depth[:, None]) / cumath.log( misc.sum(w, axis=1, keepdims=True)) F = misc.multiply(misc.subtract((x * iw) - S1, S2), alpha) del w, iw, S1, S2 cast = gpuarray.zeros((x_t.shape[1], Xt.shape[1]), dtype=theano.config.floatX) dLq_t = gpuarray.zeros(x_t.shape, dtype=theano.config.floatX) dLq_f = gpuarray.zeros(x_f.shape, dtype=theano.config.floatX) for i in range(Xt.shape[0]): S1 = misc.multiply(Xt[None, i, :], A) S2 = misc.sum(S1, axis=1, keepdims=True) S2 = misc.multiply(S2, misc.add(Xt[None, i, :], cast)) dLq_t[i, :] = misc.sum(misc.multiply(F[None, i, :], S1 - S2), axis=1) S1 = misc.multiply(Xf[None, i, :], A) S2 = misc.sum(S1, axis=1, keepdims=True) S2 = misc.multiply(S2, misc.add(Xf[None, i, :], cast)) dLq_f[i, :] = misc.sum(misc.multiply(F[None, i, :], S1 - S2), axis=1) outputs[0][0] = dalpha.get() outputs[1][0] = dLq_t.get() outputs[2][0] = dLq_f.get() for v in node.outputs: compute_map[v][0] = True
def impl_test_binaryop_2d(self, dtype): if issubclass(dtype, numbers.Integral): a_sca = np.array(np.random.randint(1, 10), dtype=dtype) b_sca = np.array(np.random.randint(1, 10), dtype=dtype) a_vec = np.random.randint(1, 10, 3).astype(dtype) b_vec = np.random.randint(1, 10, 3).astype(dtype) a_mat = np.random.randint(1, 10, 6).reshape((3, 2)).astype(dtype) b_mat = np.random.randint(1, 10, 6).reshape((3, 2)).astype(dtype) else: a_sca = np.random.normal(scale=5.0, size=()).astype(dtype) b_sca = np.random.normal(scale=5.0, size=()).astype(dtype) a_vec = np.random.normal(scale=5.0, size=(3,)).astype(dtype) b_vec = np.random.normal(scale=5.0, size=(3,)).astype(dtype) a_mat = np.random.normal(scale=5.0, size=(3, 2)).astype(dtype) b_mat = np.random.normal(scale=5.0, size=(3, 2)).astype(dtype) a_sca_gpu = gpuarray.to_gpu(a_sca) b_sca_gpu = gpuarray.to_gpu(b_sca) a_vec_gpu = gpuarray.to_gpu(a_vec) b_vec_gpu = gpuarray.to_gpu(b_vec) a_mat_gpu = gpuarray.to_gpu(a_mat) b_mat_gpu = gpuarray.to_gpu(b_mat) # addition assert np.allclose(misc.add(a_sca_gpu, b_sca_gpu).get(), a_sca+b_sca) assert np.allclose(misc.add(a_vec_gpu, b_vec_gpu).get(), a_vec+b_vec) assert np.allclose(misc.add(a_mat_gpu, b_mat_gpu).get(), a_mat+b_mat) # subtract assert np.allclose(misc.subtract(a_sca_gpu, b_sca_gpu).get(), a_sca-b_sca) assert np.allclose(misc.subtract(a_vec_gpu, b_vec_gpu).get(), a_vec-b_vec) assert np.allclose(misc.subtract(a_mat_gpu, b_mat_gpu).get(), a_mat-b_mat) # multiplication assert np.allclose(misc.multiply(a_sca_gpu, b_sca_gpu).get(), a_sca*b_sca) assert np.allclose(misc.multiply(a_vec_gpu, b_vec_gpu).get(), a_vec*b_vec) assert np.allclose(misc.multiply(a_mat_gpu, b_mat_gpu).get(), a_mat*b_mat) # division assert np.allclose(misc.divide(a_sca_gpu, b_sca_gpu).get(), a_sca/b_sca) assert np.allclose(misc.divide(a_vec_gpu, b_vec_gpu).get(), a_vec/b_vec) assert np.allclose(misc.divide(a_mat_gpu, b_mat_gpu).get(), a_mat/b_mat)
def thunk(): alpha = gpuarray.to_gpu(np.squeeze(np.asarray(inputs[0]))[:, None]) x_t = gpuarray.to_gpu(np.asarray(inputs[1])[0, :, :]) x_f = gpuarray.to_gpu(np.asarray(inputs[2])[0, :, :]) Xt = cumath.exp(misc.add(linalg.dot(x_t, A), b)) Xf = cumath.exp(misc.add(linalg.dot(x_f, A), b)) Xtn = misc.sum(Xt, axis=1, keepdims=True) Xfn = misc.sum(Xf, axis=1, keepdims=True) Xt = misc.divide(Xt, Xtn) Xf = misc.divide(Xf, Xfn) w = misc.multiply(Xt, alpha) + misc.multiply(Xf, 1 - alpha) wp = cumath.log(w) wpn = misc.sum(wp, axis=1, keepdims=True) / self.n wp = misc.subtract(wp, wpn) t1 = misc.sum(x * wp, axis=1) t2 = (self.n + depth) * cumath.log(misc.sum(w, axis=1)) t3 = depth * wpn outputs[0][0] = misc.sum(t1 - t2 + t3).get() for v in node.outputs: compute_map[v][0] = True
def __radd__(self, other): return cumisc.add(other, self) def __rsub__(self, other): return cumisc.subtract(other, self)
def __add__(self, other): return cumisc.add(self, other) def __sub__(self, other): return cumisc.subtract(self, other)
def add(a, b): ''' Calculates matrix addition "a+b".''' a_gpu = gpuarray.to_gpu(a) b_gpu = gpuarray.to_gpu(b) return misc.add(a, b)
def _impl_test_binaryop_2d(self, dtype): if issubclass(dtype, numbers.Integral): a_sca = np.array(np.random.randint(1, 10), dtype=dtype) b_sca = np.array(np.random.randint(1, 10), dtype=dtype) a_vec = np.random.randint(1, 10, 3).astype(dtype) b_vec = np.random.randint(1, 10, 3).astype(dtype) a_mat = np.random.randint(1, 10, 6).reshape((3, 2)).astype(dtype) b_mat = np.random.randint(1, 10, 6).reshape((3, 2)).astype(dtype) b_mat_f = np.random.randint(1, 10, 6).reshape( (3, 2)).astype(dtype, order='F') else: a_sca = np.random.normal(scale=5.0, size=()).astype(dtype) b_sca = np.random.normal(scale=5.0, size=()).astype(dtype) a_vec = np.random.normal(scale=5.0, size=(3, )).astype(dtype) b_vec = np.random.normal(scale=5.0, size=(3, )).astype(dtype) a_mat = np.random.normal(scale=5.0, size=(3, 2)).astype(dtype) b_mat = np.random.normal(scale=5.0, size=(3, 2)).astype(dtype) b_mat_f = np.random.normal(scale=5.0, size=(3, 2)).astype(dtype, order='F') a_sca_gpu = gpuarray.to_gpu(a_sca) b_sca_gpu = gpuarray.to_gpu(b_sca) a_vec_gpu = gpuarray.to_gpu(a_vec) b_vec_gpu = gpuarray.to_gpu(b_vec) a_mat_gpu = gpuarray.to_gpu(a_mat) b_mat_gpu = gpuarray.to_gpu(b_mat) b_mat_f_gpu = gpuarray.to_gpu(b_mat_f) # addition assert_allclose(misc.add(a_sca_gpu, b_sca_gpu).get(), a_sca + b_sca, rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.add(a_vec_gpu, b_vec_gpu).get(), a_vec + b_vec, rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.add(a_mat_gpu, b_mat_gpu).get(), a_mat + b_mat, rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) # subtract assert_allclose(misc.subtract(a_sca_gpu, b_sca_gpu).get(), a_sca - b_sca, rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.subtract(a_vec_gpu, b_vec_gpu).get(), a_vec - b_vec, rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.subtract(a_mat_gpu, b_mat_gpu).get(), a_mat - b_mat, rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) # multiplication assert_allclose(misc.multiply(a_sca_gpu, b_sca_gpu).get(), a_sca * b_sca, rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.multiply(a_vec_gpu, b_vec_gpu).get(), a_vec * b_vec, rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.multiply(a_mat_gpu, b_mat_gpu).get(), a_mat * b_mat, rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) # division if issubclass(dtype, numbers.Integral): assert_allclose(misc.divide(a_sca_gpu, b_sca_gpu).get(), a_sca // b_sca, rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.divide(a_vec_gpu, b_vec_gpu).get(), a_vec // b_vec, rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.divide(a_mat_gpu, b_mat_gpu).get(), a_mat // b_mat, rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) else: assert_allclose(misc.divide(a_sca_gpu, b_sca_gpu).get(), a_sca / b_sca, rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.divide(a_vec_gpu, b_vec_gpu).get(), a_vec / b_vec, rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.divide(a_mat_gpu, b_mat_gpu).get(), a_mat / b_mat, rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) # mismatched order assert_raises(ValueError, misc.add, a_mat_gpu, b_mat_f_gpu)
def almLasso_mat_fun(self): ''' This function represents the Augumented Lagrangian Multipliers method for Lasso problem. The lagrangian form of the Lasso can be expressed as following: MIN{ 1/2||Y-XBHETA||_2^2 + lambda||THETA||_1} s.t B-T=0 When applied to this problem, the ADMM updates take the form BHETA^t+1 = (XtX + rhoI)^-1(Xty + rho^t - mu^t) THETA^t+1 = Shrinkage_lambda/rho(BHETA(t+1) + mu(t)/rho) mu(t+1) = mu(t) + rho(BHETA(t+1) - BHETA(t+1)) The algorithm involves a 'ridge regression' update for BHETA, a soft-thresholding (shrinkage) step for THETA and then a simple linear update for mu NB: Actually, this ADMM version contains several variations such as the using of two penalty parameters instead of just one of them (mu1, mu2) ''' print('\tADMM processing...') alpha1 = alpha2 = 0 if (len(self.reg_params) == 1): alpha1 = self.reg_params[0] alpha2 = self.reg_params[0] elif (len(self.reg_params) == 2): alpha1 = self.reg_params[0] alpha2 = self.reg_params[1] #thresholds parameters for stopping criteria if (len(self.thr) == 1): thr1 = self.thr[0] thr2 = self.thr[0] elif (len(self.thr) == 2): thr1 = self.thr[0] thr2 = self.thr[1] # entry condition err1 = 10 * thr1 err2 = 10 * thr2 start_time = time.time() # setting penalty parameters for the ALM mu1p = alpha1 * 1 / self.computeLambda() print("\t\t-Compute Lambda- Time = %s seconds" % (time.time() - start_time)) mu2p = alpha2 * 1 mu1 = mu1p mu2 = mu2p i = 1 start_time = time.time() if self.GPU == True: # defining penalty parameters e constraint to minimize, lambda and C matrix respectively THETA = misc.zeros((self.num_columns, self.num_columns), dtype='float64') lambda2 = misc.zeros((self.num_columns, self.num_columns), dtype='float64') gpu_data = gpuarray.to_gpu(self.data) P_GPU = linalg.dot(gpu_data, gpu_data, transa='T') OP1 = P_GPU linalg.scale(np.float32(mu1), OP1) OP2 = linalg.eye(self.num_columns) linalg.scale(mu2, OP2) if self.affine == True: print('\t\tGPU affine...') OP3 = misc.ones((self.num_columns, self.num_columns), dtype='float64') linalg.scale(mu2, OP3) lambda3 = misc.zeros((1, self.num_columns), dtype='float64') # TODO: Because of some problem with linalg.inv version of scikit-cuda we fix it using np.linalg.inv of numpy A = np.linalg.inv( misc.add(misc.add(OP1.get(), OP2.get()), OP3.get())) A_GPU = gpuarray.to_gpu(A) while ((err1 > thr1 or err2 > thr1) and i < self.max_iter): _lambda2 = gpuarray.to_gpu(lambda2) _lambda3 = gpuarray.to_gpu(lambda3) linalg.scale(1 / mu2, _lambda2) term_OP2 = gpuarray.to_gpu(_lambda2.get()) OP2 = gpuarray.to_gpu(misc.subtract(THETA, term_OP2)) linalg.scale(mu2, OP2) OP4 = gpuarray.to_gpu( np.matlib.repmat(_lambda3.get(), self.num_columns, 1)) # updating Z BHETA = linalg.dot( A_GPU, misc.add(misc.add(misc.add(OP1, OP2), OP3), OP4)) # deallocating unnecessary GPU variables OP2.gpudata.free() OP4.gpudata.free() _lambda2.gpudata.free() _lambda3.gpudata.free() # updating C THETA = misc.add(BHETA, term_OP2) THETA = self.shrinkL1Lq(THETA.get(), 1 / mu2) THETA = THETA.astype('float64') # updating Lagrange multipliers term_lambda2 = misc.subtract(BHETA, gpuarray.to_gpu(THETA)) linalg.scale(mu2, term_lambda2) term_lambda2 = gpuarray.to_gpu(term_lambda2.get()) lambda2 = misc.add(lambda2, term_lambda2) # on GPU term_lambda3 = misc.subtract( misc.ones((1, self.num_columns), dtype='float64'), misc.sum(BHETA, axis=0)) linalg.scale(mu2, term_lambda3) term_lambda3 = gpuarray.to_gpu(term_lambda3.get()) lambda3 = misc.add(lambda3, term_lambda3) # on GPU # deallocating unnecessary GPU variables term_OP2.gpudata.free() term_lambda2.gpudata.free() term_lambda3.gpudata.free() err1 = self.errorCoef(BHETA.get(), THETA) err2 = self.errorCoef(np.sum(BHETA.get(), axis=0), np.ones([1, self.num_columns])) # deallocating unnecessary GPU variables BHETA.gpudata.free() THETA = gpuarray.to_gpu((THETA)) # reporting errors if (self.verbose and (i % self.step == 0)): print( '\t\tIteration = %d, ||Z - C|| = %2.5e, ||1 - C^T 1|| = %2.5e' % (i, err1, err2)) i += 1 THETA = THETA.get() Err = [err1, err2] if (self.verbose): print( '\t\tTerminating ADMM at iteration %5.0f, \n ||Z - C|| = %2.5e, ||1 - C^T 1|| = %2.5e. \n' % (i, err1, err2)) else: print '\t\tGPU not affine' # TODO: Because of some problem with linalg.inv version of scikit-cuda we fix it using np.linalg.inv of numpy A = np.linalg.inv(misc.add(OP1.get(), OP2.get())) A_GPU = gpuarray.to_gpu(A) while (err1 > thr1 and i < self.max_iter): _lambda2 = gpuarray.to_gpu(lambda2) term_OP2 = THETA linalg.scale(mu2, term_OP2) term_OP2 = misc.subtract(term_OP2, _lambda2) OP2 = gpuarray.to_gpu(term_OP2.get()) BHETA = linalg.dot(A_GPU, misc.add(OP1, OP2)) linalg.scale(1 / mu2, _lambda2) term_THETA = gpuarray.to_gpu(_lambda2.get()) THETA = misc.add(BHETA, term_THETA) THETA = self.shrinkL1Lq(THETA.get(), 1 / mu2) THETA = THETA.astype('float32') # updating Lagrange multipliers term_lambda2 = misc.subtract(BHETA, gpuarray.to_gpu(THETA)) linalg.scale(mu2, term_lambda2) term_lambda2 = gpuarray.to_gpu(term_lambda2.get()) lambda2 = misc.add(lambda2, term_lambda2) # on GPU err1 = self.errorCoef(BHETA.get(), THETA) THETA = gpuarray.to_gpu((THETA)) # reporting errors if (self.verbose and (i % self.step == 0)): print('\t\tIteration %5.0f, ||Z - C|| = %2.5e' % (i, err1)) i += 1 THETA = THETA.get() Err = [err1, err2] if (self.verbose): print( '\t\tTerminating ADMM at iteration %5.0f, \n ||Z - C|| = %2.5e' % (i, err1)) else: #CPU version # defining penalty parameters e constraint to minimize, lambda and C matrix respectively THETA = np.zeros([self.num_columns, self.num_columns]) lambda2 = np.zeros([self.num_columns, self.num_columns]) P = self.data.T.dot(self.data) OP1 = np.multiply(P, mu1) if self.affine == True: # INITIALIZATION lambda3 = np.zeros(self.num_columns).T A = np.linalg.inv( np.multiply(mu1, P) + np.multiply(mu2, np.eye(self.num_columns, dtype=int)) + np.multiply(mu2, np.ones([self.num_columns, self.num_columns]))) OP3 = np.multiply( mu2, np.ones([self.num_columns, self.num_columns])) while ((err1 > thr1 or err2 > thr1) and i < self.max_iter): # updating Bheta OP2 = np.multiply(THETA - np.divide(lambda2, mu2), mu2) OP4 = np.matlib.repmat(lambda3, self.num_columns, 1) BHETA = A.dot(OP1 + OP2 + OP3 + OP4) # updating C THETA = BHETA + np.divide(lambda2, mu2) THETA = self.shrinkL1Lq(THETA, 1 / mu2) # updating Lagrange multipliers lambda2 = lambda2 + np.multiply(mu2, BHETA - THETA) lambda3 = lambda3 + np.multiply( mu2, np.ones([1, self.num_columns]) - np.sum(BHETA, axis=0)) err1 = self.errorCoef(BHETA, THETA) err2 = self.errorCoef(np.sum(BHETA, axis=0), np.ones([1, self.num_columns])) # mu1 = min(mu1 * (1 + 10 ^ -5), 10 ^ 2 * mu1p); # mu2 = min(mu2 * (1 + 10 ^ -5), 10 ^ 2 * mu2p); # reporting errors if (self.verbose and (i % self.step == 0)): print( '\t\tIteration = %d, ||Z - C|| = %2.5e, ||1 - C^T 1|| = %2.5e' % (i, err1, err2)) i += 1 Err = [err1, err2] if (self.verbose): print( '\t\tTerminating ADMM at iteration %5.0f, \n ||Z - C|| = %2.5e, ||1 - C^T 1|| = %2.5e. \n' % (i, err1, err2)) else: print '\t\tCPU not affine' A = np.linalg.inv( OP1 + np.multiply(mu2, np.eye(self.num_columns, dtype=int))) while (err1 > thr1 and i < self.max_iter): # updating Z OP2 = np.multiply(mu2, THETA) - lambda2 BHETA = A.dot(OP1 + OP2) # updating C THETA = BHETA + np.divide(lambda2, mu2) THETA = self.shrinkL1Lq(THETA, 1 / mu2) # updating Lagrange multipliers lambda2 = lambda2 + np.multiply(mu2, BHETA - THETA) # computing errors err1 = self.errorCoef(BHETA, THETA) # reporting errors if (self.verbose and (i % self.step == 0)): print('\t\tIteration %5.0f, ||Z - C|| = %2.5e' % (i, err1)) i += 1 Err = [err1, err2] if (self.verbose): print( '\t\tTerminating ADMM at iteration %5.0f, \n ||Z - C|| = %2.5e' % (i, err1)) print("\t\t-ADMM- Time = %s seconds" % (time.time() - start_time)) return THETA, Err
for step in xrange(N_TIMESTEPS): # print step # Implementing split-step method # Update wavefunction and resovoir, record density cu_fft.fft(psi_gpu, psi_gpu, plan_forward) psi_gpu *= kineticFactorHalf_gpu cu_fft.ifft(psi_gpu, psi_gpu, plan_inverse, scale=True) # currentDensity_gpu = abs(psi_gpu) ** 2 # currentDensity_gpu = psi_gpu.real **2 + psi_gpu.imag ** 2 currentDensity_gpu = (psi_gpu * psi_gpu.conj()).real # modSquared.prepared_call(grid, block, psi_gpu.gpudata, # currentDensity_gpu.gpudata, 1024) # n_gpu *= cumath.exp(-gammaRdt_gpu + Rdt_gpu * currentDensity_gpu) n_gpu *= cumath.exp(misc.add(- gammaRdt_gpu, - misc.multiply(Rdt_gpu, currentDensity_gpu))) n_gpu += Pdt_gpu psi_gpu *= cumath.exp( misc.add( misc.add(misc.multiply(expFactorPolFirst_gpu, n_gpu), misc.multiply(expFactorPolSecond_gpu, currentDensity_gpu)), expFactorPolThird_gpu)) # psiNonlinear.prepared_call(grid, block, expFactorPolFirst, # expFactorPolSecond, expFactorPolThird, # psi_gpu.gpudata, n_gpu.gpudata, # currentDensity_gpu.gpudata, 1024) cu_fft.fft(psi_gpu, psi_gpu, plan_forward) # record spectrum drv.memcpy_dtod(spectrum[step, :].gpudata, psi_gpu[N//2, :].gpudata,
for step in xrange(N_TIMESTEPS): # print step # Implementing split-step method # Update wavefunction and resovoir, record density cu_fft.fft(psi_gpu, psi_gpu, plan_forward) psi_gpu *= kineticFactorHalf_gpu cu_fft.ifft(psi_gpu, psi_gpu, plan_inverse, scale=True) # currentDensity_gpu = abs(psi_gpu) ** 2 # currentDensity_gpu = psi_gpu.real **2 + psi_gpu.imag ** 2 currentDensity_gpu = (psi_gpu * psi_gpu.conj()).real # modSquared.prepared_call(grid, block, psi_gpu.gpudata, # currentDensity_gpu.gpudata, 1024) # n_gpu *= cumath.exp(-gammaRdt_gpu + Rdt_gpu * currentDensity_gpu) n_gpu *= cumath.exp( misc.add(-gammaRdt_gpu, -misc.multiply(Rdt_gpu, currentDensity_gpu))) n_gpu += Pdt_gpu psi_gpu *= cumath.exp( misc.add( misc.add( misc.multiply(expFactorPolFirst_gpu, n_gpu), misc.multiply(expFactorPolSecond_gpu, currentDensity_gpu)), expFactorPolThird_gpu)) # psiNonlinear.prepared_call(grid, block, expFactorPolFirst, # expFactorPolSecond, expFactorPolThird, # psi_gpu.gpudata, n_gpu.gpudata, # currentDensity_gpu.gpudata, 1024) cu_fft.fft(psi_gpu, psi_gpu, plan_forward) # record spectrum
def _impl_test_binaryop_2d(self, dtype): if issubclass(dtype, numbers.Integral): a_sca = np.array(np.random.randint(1, 10), dtype=dtype) b_sca = np.array(np.random.randint(1, 10), dtype=dtype) a_vec = np.random.randint(1, 10, 3).astype(dtype) b_vec = np.random.randint(1, 10, 3).astype(dtype) a_mat = np.random.randint(1, 10, 6).reshape((3, 2)).astype(dtype) b_mat = np.random.randint(1, 10, 6).reshape((3, 2)).astype(dtype) b_mat_f = np.random.randint(1, 10, 6).reshape((3, 2)).astype(dtype, order='F') else: a_sca = np.random.normal(scale=5.0, size=()).astype(dtype) b_sca = np.random.normal(scale=5.0, size=()).astype(dtype) a_vec = np.random.normal(scale=5.0, size=(3,)).astype(dtype) b_vec = np.random.normal(scale=5.0, size=(3,)).astype(dtype) a_mat = np.random.normal(scale=5.0, size=(3, 2)).astype(dtype) b_mat = np.random.normal(scale=5.0, size=(3, 2)).astype(dtype) b_mat_f = np.random.normal(scale=5.0, size=(3, 2)).astype(dtype, order='F') a_sca_gpu = gpuarray.to_gpu(a_sca) b_sca_gpu = gpuarray.to_gpu(b_sca) a_vec_gpu = gpuarray.to_gpu(a_vec) b_vec_gpu = gpuarray.to_gpu(b_vec) a_mat_gpu = gpuarray.to_gpu(a_mat) b_mat_gpu = gpuarray.to_gpu(b_mat) b_mat_f_gpu = gpuarray.to_gpu(b_mat_f) # addition assert_allclose(misc.add(a_sca_gpu, b_sca_gpu).get(), a_sca+b_sca, rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.add(a_vec_gpu, b_vec_gpu).get(), a_vec+b_vec, rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.add(a_mat_gpu, b_mat_gpu).get(), a_mat+b_mat, rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) # subtract assert_allclose(misc.subtract(a_sca_gpu, b_sca_gpu).get(), a_sca-b_sca, rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.subtract(a_vec_gpu, b_vec_gpu).get(), a_vec-b_vec, rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.subtract(a_mat_gpu, b_mat_gpu).get(), a_mat-b_mat, rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) # multiplication assert_allclose(misc.multiply(a_sca_gpu, b_sca_gpu).get(), a_sca*b_sca, rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.multiply(a_vec_gpu, b_vec_gpu).get(), a_vec*b_vec, rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.multiply(a_mat_gpu, b_mat_gpu).get(), a_mat*b_mat, rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) # division if issubclass(dtype, numbers.Integral): assert_allclose(misc.divide(a_sca_gpu, b_sca_gpu).get(), a_sca//b_sca, rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.divide(a_vec_gpu, b_vec_gpu).get(), a_vec//b_vec, rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.divide(a_mat_gpu, b_mat_gpu).get(), a_mat//b_mat, rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) else: assert_allclose(misc.divide(a_sca_gpu, b_sca_gpu).get(), a_sca/b_sca, rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.divide(a_vec_gpu, b_vec_gpu).get(), a_vec/b_vec, rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.divide(a_mat_gpu, b_mat_gpu).get(), a_mat/b_mat, rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) # mismatched order assert_raises(ValueError, misc.add, a_mat_gpu, b_mat_f_gpu)