def softmax_test(): def softmax(X): #numerically stable softmax function max_row_values = np.matrix(np.max(X, axis=1)).T result = np.exp(X - max_row_values) sums = np.matrix(np.sum(result, axis=1)) return result / sums A = np.float32(np.random.randn(17, 83)) B = gpu.array(A) C = gpu.softmax(B).tocpu() t.assert_almost_equal(C, softmax(A), 3, "Softmax not working")
for j in range(2): gpu.dot(A, B, C) t.tick(str(dim_inner)) for j in range(iters): gpu.dot(A, B, C) sec = t.tock(str(dim_inner)) / 1000. tilesA = (dim1 / 16) * ((dim_inner / 64) + (1 if dim_inner % 64 > 0 else 0)) tilesB = ((dim_inner / 64) + (1 if dim_inner % 64 > 0 else 0)) * ( (dim_inner / 16) * ((dim_outer / 64) + (1 if dim_outer % 64 > 0 else 0))) memops = (tilesA + tilesB) * 16 * 64 + (dim_inner * dim_outer) #print sec / (memops*iters) #print (memops/sec)*4*(1024**-3)*iters #print iters*(dim**3)/(sec*1000*1000*1000) #print iters*(dim1*dim_inner*dim_outer)/(sec*1000*1000*1000) print iters * dim1 * dim_inner * dim_outer / (6144. * 1000 * 1000 * 1000) * 24, sec A2 = gpu.rand(dim1, dim_inner) C2 = gpu.rand(dim1, dim_inner) v = gpu.rand(dim1, 1) t.tick("add " + str(dim_inner)) for j in range(iters): #gpu.add(A,A2, C2) #gpu.vector_add(A, v, C2) gpu.softmax(A, A2) sec = t.tock("add " + str(dim_inner)) / 1000.