def multMatVect(v, A, m1, B, m2): # TODO : need description for parameter and return """ Multiply the first half of v by A with a modulo of m1 and the second half by B with a modulo of m2. Notes ----- The parameters of dot_modulo are passed implicitly because passing them explicitly takes more time than running the function's C-code. """ if multMatVect.dot_modulo is None: A_sym = lmatrix("A") s_sym = ivector("s") m_sym = iscalar("m") A2_sym = lmatrix("A2") s2_sym = ivector("s2") m2_sym = iscalar("m2") o = DotModulo()(A_sym, s_sym, m_sym, A2_sym, s2_sym, m2_sym) multMatVect.dot_modulo = function( [A_sym, s_sym, m_sym, A2_sym, s2_sym, m2_sym], o, profile=False) # This way of calling the Aesara fct is done to bypass Aesara overhead. f = multMatVect.dot_modulo f.input_storage[0].storage[0] = A f.input_storage[1].storage[0] = v[:3] f.input_storage[2].storage[0] = m1 f.input_storage[3].storage[0] = B f.input_storage[4].storage[0] = v[3:] f.input_storage[5].storage[0] = m2 f.fn() r = f.output_storage[0].storage[0] return r
def test_multMatVect(): A1 = lmatrix("A1") s1 = ivector("s1") m1 = iscalar("m1") A2 = lmatrix("A2") s2 = ivector("s2") m2 = iscalar("m2") g0 = rng_mrg.DotModulo()(A1, s1, m1, A2, s2, m2) f0 = function([A1, s1, m1, A2, s2, m2], g0) i32max = np.iinfo(np.int32).max rng = np.random.default_rng(utt.fetch_seed()) A1 = rng.integers(0, i32max, (3, 3)).astype("int64") s1 = rng.integers(0, i32max, 3).astype("int32") m1 = np.asarray(rng.integers(i32max), dtype="int32") A2 = rng.integers(0, i32max, (3, 3)).astype("int64") s2 = rng.integers(0, i32max, 3).astype("int32") m2 = np.asarray(rng.integers(i32max), dtype="int32") f0.input_storage[0].storage[0] = A1 f0.input_storage[1].storage[0] = s1 f0.input_storage[2].storage[0] = m1 f0.input_storage[3].storage[0] = A2 f0.input_storage[4].storage[0] = s2 f0.input_storage[5].storage[0] = m2 r_a1 = rng_mrg.matVecModM(A1, s1, m1) r_a2 = rng_mrg.matVecModM(A2, s2, m2) f0.vm() r_b = f0.output_storage[0].value assert np.allclose(r_a1, r_b[:3]) assert np.allclose(r_a2, r_b[3:])
def test_correct_solution(self): x = lmatrix() y = lmatrix() z = lscalar() b = aesara.tensor.nlinalg.lstsq()(x, y, z) f = function([x, y, z], b) TestMatrix1 = np.asarray([[2, 1], [3, 4]]) TestMatrix2 = np.asarray([[17, 20], [43, 50]]) TestScalar = np.asarray(1) f = function([x, y, z], b) m = f(TestMatrix1, TestMatrix2, TestScalar) assert np.allclose(TestMatrix2, np.dot(TestMatrix1, m[0]))
def test_blocksparse_inplace_gemv_opt(): b = fmatrix() W = ftensor4() h = ftensor3() iIdx = lmatrix() oIdx = lmatrix() o = sparse_block_dot(W, h, iIdx, b, oIdx) f = aesara.function([W, h, iIdx, b, oIdx], o) if aesara.config.mode == "FAST_COMPILE": assert not f.maker.fgraph.toposort()[-1].op.inplace assert check_stack_trace(f, ops_to_check=[sparse_block_gemv]) else: assert f.maker.fgraph.toposort()[-1].op.inplace assert check_stack_trace(f, ops_to_check=[sparse_block_gemv_inplace])
def test_blocksparse_inplace_outer_opt(): b = fmatrix() W = ftensor4() h = ftensor3() iIdx = lmatrix() oIdx = lmatrix() o = sparse_block_dot(W, h, iIdx, b, oIdx) f = aesara.function([W, h, iIdx, b, oIdx], [o, aesara.gradient.grad(o.sum(), wrt=W)]) if aesara.config.mode == "FAST_COMPILE": assert not f.maker.fgraph.toposort()[-1].op.inplace assert check_stack_trace(f, ops_to_check=sparse_block_outer) else: assert f.maker.fgraph.toposort()[-1].op.inplace assert check_stack_trace(f, ops_to_check=sparse_block_outer_inplace)
def test_blocksparse_grad_merge(self): b = fmatrix() h = ftensor3() iIdx = lmatrix() oIdx = lmatrix() W_val, h_val, iIdx_val, b_val, oIdx_val = self.gemv_data() W = gpuarray_shared_constructor(W_val, context=test_ctx_name) o = gpu_sparse_block_gemv(b.take(oIdx, axis=0), W, h, iIdx, oIdx) gW = aesara.grad(o.sum(), W) lr = np.asarray(0.05, dtype="float32") upd = W - lr * gW f1 = aesara.function([h, iIdx, b, oIdx], updates=[(W, upd)], mode=mode_with_gpu) # Make sure the lr update was merged. assert isinstance(f1.maker.fgraph.outputs[0].owner.op, GpuSparseBlockOuter) # Exclude the merge optimizations. mode = mode_with_gpu.excluding("local_merge_blocksparse_alpha") mode = mode.excluding("local_merge_blocksparse_output") f2 = aesara.function([h, iIdx, b, oIdx], updates=[(W, upd)], mode=mode) # Make sure the lr update is not merged. assert not isinstance(f2.maker.fgraph.outputs[0].owner.op, GpuSparseBlockOuter) f2(h_val, iIdx_val, b_val, oIdx_val) W_ref = W.get_value() # reset the var W.set_value(W_val) f1(h_val, iIdx_val, b_val, oIdx_val) W_opt = W.get_value() utt.assert_allclose(W_ref, W_opt)