def verify_split(src_shape, indices_or_sections, axis): A = tvm.placeholder(shape=src_shape, name="A") tensor_l = topi.split(A, indices_or_sections, axis=axis) s = topi.cuda.schedule_injective(tensor_l) def check_device(device): if not tvm.module.enabled(device): print("Skip because %s is not enabled" % device) return ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0) foo = tvm.build(s, [A] + tensor_l, device, name="split") data_npy = np.random.normal(size=src_shape).astype(A.dtype) out_npys = np.split(data_npy, indices_or_sections, axis=axis) data_nd = tvm.nd.array(data_npy, ctx) out_nds = [ tvm.nd.empty(out_npy.shape, ctx=ctx, dtype=tensor_l[0].dtype) for out_npy in out_npys ] foo(*([data_nd] + out_nds)) for out_nd, out_npy in zip(out_nds, out_npys): np.testing.assert_allclose(out_nd.asnumpy(), out_npy) check_device("cuda") check_device("opencl") check_device("metal")
def verify_split(src_shape, indices_or_sections, axis): A = tvm.placeholder(shape=src_shape, name="A") tensor_l = topi.split(A, indices_or_sections, axis=axis) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_injective(tensor_l) foo = tvm.build(s, [A] + list(tensor_l), device, name="split") data_npy = np.random.normal(size=src_shape).astype(A.dtype) out_npys = np.split(data_npy, indices_or_sections, axis=axis) data_nd = tvm.nd.array(data_npy, ctx) out_nds = [ tvm.nd.empty(out_npy.shape, ctx=ctx, dtype=tensor_l[0].dtype) for out_npy in out_npys ] foo(*([data_nd] + out_nds)) for out_nd, out_npy in zip(out_nds, out_npys): tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy) for device in get_all_backend(): check_device(device)
def verify_split(src_shape, indices_or_sections, axis): A = tvm.placeholder(shape=src_shape, name="A") tensor_l = topi.split(A, indices_or_sections, axis=axis) def check_device(device): if not tvm.module.enabled(device): print("Skip because %s is not enabled" % device) return with tvm.target.create(device): s = topi.generic.schedule_injective(tensor_l) ctx = tvm.context(device, 0) foo = tvm.build(s, [A] + tensor_l, device, name="split") data_npy = np.random.normal(size=src_shape).astype(A.dtype) out_npys = np.split(data_npy, indices_or_sections, axis=axis) data_nd = tvm.nd.array(data_npy, ctx) out_nds = [ tvm.nd.empty(out_npy.shape, ctx=ctx, dtype=tensor_l[0].dtype) for out_npy in out_npys ] foo(*([data_nd] + out_nds)) for out_nd, out_npy in zip(out_nds, out_npys): np.testing.assert_allclose(out_nd.asnumpy(), out_npy) for device in ["llvm", "nvptx", "cuda", "opencl", "metal", "rocm"]: check_device(device)
def compute_split(attrs, inputs, out_info): """Compute definition of split""" x = attrs["indices_or_sections"] if x.startswith("(") or x.startswith("["): indices_or_sections = attrs.get_int_tuple("indices_or_sections") else: indices_or_sections = attrs.get_int("indices_or_sections") return topi.split(inputs[0], indices_or_sections, axis=attrs.get_int("axis"))
def internel_lltm(input, weight_for_gate, bias_for_gate, old_h, old_c): ''' input: [batch_size, 28*28] old_h & old_c: [batch_size, state_size] >>>>> cat -> X: [batch_size, state_size+28*28] weight_for_gate: [3*state_size, state_size+28*28] bias_for_gate:[3*state_size] ''' X = topi.concatenate([old_h, input], axis=1) gate_weights = topi.nn.dense(X, weight_for_gate, bias_for_gate) gates = topi.split(gate_weights, 3, axis=1) input_gate = topi.sigmoid(gates[0]) output_gate = topi.sigmoid(gates[1]) candidate_cell = elu(gates[2]) new_c = topi.add(old_c, topi.multiply(candidate_cell, input_gate)) new_h = topi.multiply(topi.tanh(new_c), output_gate) return [new_h, new_c]
def verify_split(src_shape, indices_or_sections, axis): A = tvm.placeholder(shape=src_shape, name="A") tensor_l = topi.split(A, indices_or_sections, axis=axis) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_injective(tensor_l) foo = tvm.build(s, [A] + list(tensor_l), device, name="split") data_npy = np.random.normal(size=src_shape).astype(A.dtype) out_npys = np.split(data_npy, indices_or_sections, axis=axis) data_nd = tvm.nd.array(data_npy, ctx) out_nds = [tvm.nd.empty(out_npy.shape, ctx=ctx, dtype=tensor_l[0].dtype) for out_npy in out_npys] foo(*([data_nd] + out_nds)) for out_nd, out_npy in zip(out_nds, out_npys): tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy) for device in get_all_backend(): check_device(device)
(np.zeros, 'shape'), (np.zeros, 'shape'), (np.ones, 'shape'), (np.zeros, 'shape'), (np.zeros, 'shape'), (np.random.normal, 'size'), (np.random.normal, 'size')] # Graph input x = tvm.placeholder((batch_size, num_timesteps * num_input), 'float32') y = tvm.placeholder((batch_size, num_classes), 'float32') s = tvm.placeholder((batch_size, num_hidden), 'float32') h = tvm.placeholder((batch_size, num_hidden), 'float32') # Tensors and vars for training graph weights = [tvm.placeholder(x, 'float32') for x in sizes] #Construct model xs = topi.split(topi.reshape(x, (batch_size, num_timesteps, num_input)), num_timesteps, axis=1) xs = [topi.reshape(x, (batch_size, num_input)) for x in xs] new_s = s new_h = h for i in range(num_timesteps): inp = topi.concatenate([xs[i], new_h], 1) g = topi.tanh(topi.matmul(inp, weights[0]) + weights[1]) j = topi.sigmoid(topi.matmul(inp, weights[2]) + weights[3]) f = topi.sigmoid(topi.matmul(inp, weights[4]) + weights[5]) o = topi.sigmoid(topi.matmul(inp, weights[6]) + weights[7]) new_s = new_s * f + g * j new_h = topi.tanh(new_s) * o logits = topi.matmul(new_h, weights[8]) + weights[9]