def test_magma_fermi_matrix_mul(ctx_factory): dtype = np.float32 ctx = ctx_factory() order = "C" n = get_suitable_size(ctx) if (not ctx.devices[0].image_support or ctx.devices[0].platform.name == "Portable Computing Language"): pytest.skip("crashes on pocl") image_format = cl.ImageFormat(cl.channel_order.R, cl.channel_type.FLOAT) if image_format not in cl.get_supported_image_formats( ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D): pytest.skip("image format not supported") knl = lp.make_kernel( "{[i,j,k]: 0<=i,j,k<%d}" % n, ["c[i, j] = sum(k, a[i, k]*b[k, j])"], [ lp.ImageArg("a", dtype, shape=(n, n)), lp.ImageArg("b", dtype, shape=(n, n)), lp.GlobalArg("c", dtype, shape=(n, n), order=order), ], name="matmul") seq_knl = knl i_reg = 4 j_reg = 4 i_chunks = 16 j_chunks = 16 knl = lp.split_iname(knl, "i", i_reg * i_chunks, outer_tag="g.0") knl = lp.split_iname(knl, "i_inner", i_reg, outer_tag="l.0", inner_tag="ilp") knl = lp.split_iname(knl, "j", j_reg * j_chunks, outer_tag="g.1") knl = lp.split_iname(knl, "j_inner", j_reg, outer_tag="l.1", inner_tag="ilp") knl = lp.split_iname(knl, "k", 16) knl = lp.split_iname(knl, "k_inner", 8, outer_tag="unr") # FIXME #knl = lp.add_prefetch(knl, 'a', ["k_inner", "i_inner_inner", "i_inner_outer"], # default_tag="l.auto") #knl = lp.add_prefetch(knl, 'b', # ["k_inner", ("j_inner_inner", "j_inner_outer"),], default_tag="l.auto") lp.auto_test_vs_ref(seq_knl, ctx, knl, op_count=[2 * n**3 / 1e9], op_label=["GFlops"], parameters={}, blacklist_ref_vendors="pocl")
def no_test_image_matrix_mul_ilp(ctx_factory): dtype = np.float32 ctx = ctx_factory() order = "C" if (not ctx.devices[0].image_support or ctx.devices[0].platform.name == "Portable Computing Language"): pytest.skip("crashes on pocl") image_format = cl.ImageFormat(cl.channel_order.R, cl.channel_type.FLOAT) if image_format not in cl.get_supported_image_formats( ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D): pytest.skip("image format not supported") n = get_suitable_size(ctx) knl = lp.make_kernel( "{[i,j,k]: 0<=i,j,k<%d}" % n, ["c[i, j] = sum(k, a[i, k]*b[k, j])"], [ lp.ImageArg("a", dtype, shape=(n, n)), lp.ImageArg("b", dtype, shape=(n, n)), lp.GlobalArg("c", dtype, shape=(n, n), order=order), ], name="matmul") seq_knl = knl ilp = 4 knl = lp.split_iname(knl, "i", 2, outer_tag="g.0", inner_tag="l.1") j_inner_split = 4 knl = lp.split_iname(knl, "j", ilp * j_inner_split, outer_tag="g.1") knl = lp.split_iname(knl, "j_inner", j_inner_split, outer_tag="ilp", inner_tag="l.0") knl = lp.split_iname(knl, "k", 2) # conflict-free? knl = lp.add_prefetch(knl, 'a', ["i_inner", "k_inner"], default_tag="l.auto") knl = lp.add_prefetch(knl, 'b', ["j_inner_outer", "j_inner_inner", "k_inner"], default_tag="l.auto") lp.auto_test_vs_ref(seq_knl, ctx, knl, op_count=[2 * n**3 / 1e9], op_label=["GFlops"], parameters={})