test5=[rand(3, 4), 0.0, rand(3, 5), rand(5, 4), 0.6], test6=[rand(3, 4), 0.0, rand(3, 5), rand(5, 4), -1.0], test7=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), 0.0], test8=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), 1.1], test9=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), -1.1], # test10=[rand(0, 4), -1.0, rand(0, 5), rand(5, 4), 0.0], # test11=[rand(3, 0), -1.0, rand(3, 5), rand(5, 0), 1.1], # test12=[rand(3, 4), -1.0, rand(3, 0), rand(0, 4), -1.1], # test13=[rand(0, 0), -1.0, rand(0, 0), rand(0, 0), -1.1], ) ) GpuGemmBatchTester = makeTester( 'GpuGemmBatchTester', op=lambda z, alpha, x, y, beta: alpha * batched_dot(x, y) + beta * z, gpu_op=gpugemmbatch_no_inplace, cases=dict( ("test_b%im%ik%in%i" % (b, m, k, n), [rand(b, m, n), rand(), rand(b, m, k), rand(b, k, n), rand()]) for b, m, k, n in itertools.combinations([2, 3, 5, 7, 11, 13], 4))) class TestGpuSger(TestGer): def setUp(self): self.mode = mode_with_gpu dtype = self.dtype = 'float32' # optimization isn't dtype-dependent self.A = tensor.tensor(dtype=dtype, broadcastable=(False, False)) self.a = tensor.tensor(dtype=dtype, broadcastable=()) self.x = tensor.tensor(dtype=dtype, broadcastable=(False,)) self.y = tensor.tensor(dtype=dtype, broadcastable=(False,))
rand(5, 4), -1.0], test7=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), 0.0], test8=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), 1.1], test9=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), -1.1], # test10=[rand(0, 4), -1.0, rand(0, 5), rand(5, 4), 0.0], # test11=[rand(3, 0), -1.0, rand(3, 5), rand(5, 0), 1.1], # test12=[rand(3, 4), -1.0, rand(3, 0), rand(0, 4), -1.1], # test13=[rand(0, 0), -1.0, rand(0, 0), rand(0, 0), -1.1], )) GpuGemmBatchTester = makeTester( 'GpuGemmBatchTester', op=lambda z, alpha, x, y, beta: alpha * batched_dot(x, y) + beta * z, gpu_op=gpugemmbatch_no_inplace, cases=dict( ("test_b%im%ik%in%i" % (b, m, k, n), [rand(b, m, n), rand(), rand(b, m, k), rand(b, k, n), rand()]) for b, m, k, n in itertools.combinations([2, 3, 5, 7, 11, 13], 4))) class TestGpuSger(TestGer): def setUp(self): self.mode = mode_with_gpu dtype = self.dtype = 'float32' # optimization isn't dtype-dependent self.A = tensor.tensor(dtype=dtype, broadcastable=(False, False))