def _shared_nd_test(bit, low_bit_type): max_value = (1 << bit) - 1 min_value = 1 - (1 << bit) data = np.arange(min_value, max_value + 2, 2, dtype=low_bit_type) snd = as_raw_tensor(data, dtype=low_bit_type, device="xpux") np.testing.assert_allclose(snd.numpy(), range(min_value, max_value + 2, 2)) data = np.arange(min_value, max_value + 2, 4, dtype=low_bit_type) snd = as_raw_tensor(data, dtype=low_bit_type, device="xpux") np.testing.assert_allclose(snd.numpy(), range(min_value, max_value + 2, 4))
def test_shared_nd(): data = np.array([-3.4, 1.394683, 2.323497, -7.439948, -5.2397], dtype=bfloat16) snd = as_raw_tensor(data, dtype=bfloat16, device="xpux") assert snd.numpy().dtype == bfloat16 np.testing.assert_allclose( snd.numpy(), [-3.40625, 1.398438, 2.328125, -7.4375, -5.25], atol=1e-6 ) data = np.array([-9.34964, -8.342, 9.4385, 0.18746, 1.48], dtype=bfloat16) snd = as_raw_tensor(data, dtype=bfloat16, device="xpux") np.testing.assert_allclose( snd.numpy(), [-9.375, -8.3125, 9.4375, 0.1875, 1.476562], atol=1e-6 )
def test_trace(): for symbolic in [False, True]: @trace(symbolic=symbolic) def f(x): op = ops.Elemwise(Elemwise.Mode.NEGATE) (y, ) = apply(op, x) return y x = as_raw_tensor([1]).numpy() y = f.__wrapped__(as_raw_tensor(x)).numpy() for i in range(3): np.testing.assert_equal(f(as_raw_tensor(x)).numpy(), y)
def test_batched_mesh_indexing(): x = np.arange(24).reshape(2, 3, 4).astype("int32") d = np.arange(12).reshape(2, 2, 3).astype("int32") xx = as_raw_tensor(x) s = [(0, 1, 2), (1, 2, 3)] (yy0, ) = batched_mesh_indexing(xx, (slice(None, None, None), [(0, 2)] * 2, s)) (yy1, ) = batched_set_mesh_indexing( xx, d, (slice(None, None, None), [(0, 2)] * 2, s)) (yy2, ) = batched_incr_mesh_indexing( xx, d, (slice(None, None, None), [(0, 2)] * 2, s)) r = np.ndarray(shape=(2, 2, 3), dtype="int32") for i in range(2): for j0, j1 in enumerate((0, 2)): for k0, k1 in enumerate(s[i]): r[i, j0, k0] = x[i, j1, k1] np.testing.assert_equal(r, yy0.numpy()) r = x.copy() for i in range(2): for j0, j1 in enumerate((0, 2)): for k0, k1 in enumerate(s[i]): r[i, j1, k1] = d[i, j0, k0] np.testing.assert_equal(r, yy1.numpy()) r = x.copy() for i in range(2): for j0, j1 in enumerate((0, 2)): for k0, k1 in enumerate(s[i]): r[i, j1, k1] += d[i, j0, k0] np.testing.assert_equal(r, yy2.numpy())
def eval_partial(inp, oup): if not isinstance(oup, (list, tuple)): oup = (oup, ) inputs = cgtools.get_dep_vars(oup, "Host2DeviceCopy") if mge_version <= "0.6.0": cg = oup[0].owner_graph outputs = list(map(mgb.copy_output, oup)) f = cg.compile(inputs, outputs) result = f(inp) else: if not isinstance(inp, (list, tuple)): inp = (inp, ) replace_dict = {} inp_node_list = [] for i in inputs: inp_node = G.InputNode(device="xpux", dtype=inputs[0].dtype, graph=inputs[0].graph) replace_dict[i] = inp_node.outputs[0] inp_node_list.append(inp_node) new_out = cgtools.replace_vars(oup, replace_dict) out_node_list = [G.OutputNode(i) for i in new_out] new_out_list = [i.outputs[0] for i in out_node_list] cg = new_out_list[0].graph func = cg.compile(new_out_list) for node, value in zip(inp_node_list, inp): node.set_value(as_raw_tensor(value)._dev_tensor()) func.execute() result = [o.get_value().numpy() for o in out_node_list] return result
def test_as_raw_tensor_from_int64(): x = np.arange(6, dtype="int64").reshape(2, 3) xx = as_raw_tensor(x, dtype="float32", device="xpux") yy = F.add(xx, 1).numpy() assert xx.dtype == np.float32 assert xx.device == "xpux" np.testing.assert_almost_equal(yy, x.astype("float32") + 1)
def test_mesh_indexing(): x = np.arange(25).reshape(5, 5).astype("int32") d = np.arange(6).reshape(3, 2).astype("int32") xx = as_raw_tensor(x) (yy0, ) = mesh_indexing(xx, (slice(0, 5, 2), (1, 3))) (yy1, ) = set_mesh_indexing(xx, d, (slice(0, 5, 2), (1, 3))) (yy2, ) = incr_mesh_indexing(xx, d, (slice(0, 5, 2), (1, 3))) r = np.ndarray(shape=(3, 2), dtype="int32") for i0, i1 in enumerate(range(0, 5, 2)): for j0, j1 in enumerate((1, 3)): r[i0, j0] = x[i1, j1] np.testing.assert_equal(r, yy0.numpy()) r = x.copy() for i0, i1 in enumerate(range(0, 5, 2)): for j0, j1 in enumerate((1, 3)): r[i1, j1] = d[i0, j0] np.testing.assert_equal(r, yy1.numpy()) r = x.copy() for i0, i1 in enumerate(range(0, 5, 2)): for j0, j1 in enumerate((1, 3)): r[i1, j1] += d[i0, j0] np.testing.assert_equal(r, yy2.numpy())
def test_trace_profiler(): for symbolic in [False, True]: @trace(symbolic=symbolic, profiling=True) def f(x): op = ops.Elemwise(Elemwise.Mode.NEGATE) (y, ) = apply(op, x) return y x = as_raw_tensor([1]).numpy() y = f.__wrapped__(as_raw_tensor(x)).numpy() f(as_raw_tensor(x)) f(as_raw_tensor(x)) # XXX: has to run twice out = f.get_profile() assert out.get("profiler")
def test_exclude_from_trace(): for symbolic in [False, True]: @trace(symbolic=symbolic) def f(x): neg = ops.Elemwise(Elemwise.Mode.NEGATE) (x, ) = apply(neg, x) with exclude_from_trace(): if i % 2: (x, ) = apply(neg, x) (x, ) = apply(neg, x) return x x = as_raw_tensor([1]).numpy() for i in range(3): y = f.__wrapped__(as_raw_tensor(x)).numpy() np.testing.assert_equal(f(as_raw_tensor(x)).numpy(), y)
def _get_compiled_result(inp, dtype, shape, device, calc_func=None): graph = G.Graph() # graph.options.async_exec_level = 0b100 inp_node = G.InputNode(device=device, dtype=dtype, shape=shape, graph=graph) temp_rst = calc_func(inp_node.outputs[0]) oup_node = G.OutputNode(temp_rst) func = graph.compile(oup_node.outputs[0]) inp_node.set_value(as_raw_tensor(inp, dtype=dtype, device=device)._dev_tensor()) func.execute() return oup_node.get_value().numpy()
def test_raw_tensor(): from megengine.core.ops.builtin import Elemwise from megengine.core.tensor.raw_tensor import as_raw_tensor x = np.random.rand(10).astype("float32") xx = as_raw_tensor(x) (yy,) = apply(Elemwise(Elemwise.Mode.MUL), xx, xx) np.testing.assert_allclose(x * x, yy.numpy()) (yy,) = apply(Elemwise(Elemwise.Mode.MUL), xx, xx) np.testing.assert_allclose(x * x, yy.numpy())
def test_capture_dump(): a = as_raw_tensor([2]) @trace(symbolic=True, capture_as_const=True) def f(x): op = ops.Elemwise(Elemwise.Mode.MUL) (y, ) = apply(op, x, a) return y x = as_raw_tensor([3]).numpy() y = f.__wrapped__(as_raw_tensor(x)).numpy() for i in range(3): np.testing.assert_equal(f(as_raw_tensor(x)).numpy(), y) file = io.BytesIO() f.dump(file) file.seek(0) result = cgtools.load_and_inference(file, [x]) np.testing.assert_equal(result[0], y)
def test_print_in_trace(): for symbolic in [False]: # cannot read value in symbolic mode @trace(symbolic=symbolic) def f(x): nonlocal buf neg = ops.Elemwise(Elemwise.Mode.NEGATE) (x, ) = apply(neg, x) buf = x.numpy() (x, ) = apply(neg, x) return x buf = None x = as_raw_tensor([1]).numpy() for i in range(3): y = f.__wrapped__(as_raw_tensor(x)).numpy() z = buf buf = None np.testing.assert_equal(f(as_raw_tensor(x)).numpy(), y) np.testing.assert_equal(z, buf)
def test_dump_volatile(): p = as_raw_tensor([2]) @trace(symbolic=True, capture_as_const=True) def f(x): op = ops.Elemwise(Elemwise.Mode.MUL) (y, ) = apply(op, x, p) return y x = as_raw_tensor([3]).numpy() y = f.__wrapped__(as_raw_tensor(x)).numpy() for i in range(3): np.testing.assert_equal(f(as_raw_tensor(x)).numpy(), y) file = io.BytesIO() f.dump(file, optimize_for_inference=False) file.seek(0) cg, _, outputs = G.load_graph(file) (out, ) = outputs assert (cgtools.get_owner_opr_type( cgtools.get_owner_opr_inputs(out)[1]) == "ImmutableTensor")
def test_advance_indexing(): x = np.arange(25).reshape(5, 5).astype("int32") d = np.arange(15).reshape(3, 5).astype("int32") xx = as_raw_tensor(x) (yy0,) = advance_indexing(xx, ((0, 4, 2), slice(None, None, None))) (yy1,) = set_advance_indexing(xx, d, ((0, 4, 2), slice(None, None, None))) (yy2,) = incr_advance_indexing(xx, d, ((0, 4, 2), slice(None, None, None))) np.testing.assert_equal(x[(0, 4, 2), :], yy0.numpy()) x_ = x.copy() x_[(0, 4, 2), :] = d np.testing.assert_equal(x_, yy1.numpy()) x_ = x.copy() x_[(0, 4, 2), :] += d np.testing.assert_equal(x_, yy2.numpy())
def test_subtensor(): x = np.arange(25).reshape(5, 5).astype("int32") d = np.arange(2).astype("int32") xx = as_raw_tensor(x) (yy0, ) = subtensor(xx, (slice(0, 4, 2), 3)) (yy1, ) = set_subtensor(xx, d, (slice(0, 4, 2), 3)) (yy2, ) = incr_subtensor(xx, d, (slice(0, 4, 2), 3)) np.testing.assert_equal(x[0:4:2, 3], yy0.numpy()) x_ = x.copy() x_[0:4:2, 3] = d np.testing.assert_equal(x_, yy1.numpy()) x_ = x.copy() x_[0:4:2, 3] += d np.testing.assert_equal(x_, yy2.numpy())
def canonize_inputs(inputs, *, config): """convert immediate numbers and SharedND to SymbolVar in inputs; at least one of the inputs must be SymbolVar, so comp node and comp graph can beinferred :return: list of converted vars """ if ( isinstance(inputs, (list, tuple)) and len(inputs) == 1 and isinstance(inputs[0], (list, tuple)) ): # handle the case when a list is passed to a function with # variable-length argument (e.g. concat has signature concat(*inputs) # and is called with concat([a, b])) inputs = inputs[0] if isinstance(inputs, RawTensor): return [inputs] old_inputs = inputs inputs = [] get_comp_node = None need_cvt = False for i in old_inputs: if isinstance(i, RawTensor): get_comp_node = lambda cn=i.device.to_c(): cn else: need_cvt = True inputs.append(i) if not need_cvt: return inputs if get_comp_node is None: def get_comp_node(): return config.comp_node for idx, var in enumerate(inputs): if not isinstance(var, RawTensor): var = as_raw_tensor(var) inputs[idx] = var return inputs
def test_dump(): @trace(symbolic=True, capture_as_const=True) def f(a, b): op = ops.Elemwise(Elemwise.Mode.ADD) (y,) = apply(op, a, b) return y a = as_raw_tensor([2]).numpy() b = as_raw_tensor([4]).numpy() y = f.__wrapped__(as_raw_tensor(a), as_raw_tensor(b)).numpy() for i in range(3): np.testing.assert_equal(f(as_raw_tensor(a), as_raw_tensor(b)).numpy(), y) file = io.BytesIO() dump_info = f.dump(file) assert dump_info.nr_opr == 3 np.testing.assert_equal(dump_info.inputs, ["h2d[0]", "h2d[2]"]) np.testing.assert_equal(dump_info.outputs, ["ADD(h2d[0],h2d[2])[4]"]) file.seek(0) result = cgtools.load_and_inference(file, [a, b]) np.testing.assert_equal(result[0], y)
def make_dev_tensor(value, dtype=None, device=None): return as_raw_tensor(value, dtype=dtype, device=device)._dev_tensor()
def test_broadcast(): x = np.arange(10).reshape(1, 10).astype("int32") xx = as_raw_tensor(x) (yy, ) = broadcast(xx, (10, 10)) np.testing.assert_equal(np.repeat(x, 10, 0), yy.numpy())
def test_transpose(): x = np.arange(10).reshape(2, 5).astype("int32") xx = as_raw_tensor(x) (yy, ) = transpose(xx, pattern=[1, -1, 0]) np.testing.assert_equal(np.expand_dims(x.transpose(), axis=1), yy.numpy())
def as_tensor(val, device): assert device is not None, "can not infer device" # TODO: should copy to appropriate device val = as_raw_tensor(val, device=device) return val
def as_tensor(v): if not isinstance(v, RawTensor): vi = np.ascontiguousarray(v, dtype=np.int32) assert np.abs(vi - v).max() == 0, "bad index: {!r}".format(v) v = as_raw_tensor(vi) return v
def as_tensor(x): return Tensor(as_raw_tensor(x, device=mge.device.get_default_device()))