def verify_concatenate(shapes, axis): tensor_l = [] for i, shape in enumerate(shapes): tensor_l.append(tvm.placeholder(shape, name="A" + str(i))) out_tensor = topi.concatenate(a_tuple=tensor_l, axis=axis) s = topi.cuda.schedule_injective(out_tensor) def check_device(device): if not tvm.module.enabled(device): print("Skip because %s is not enabled" % device) return ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0) foo = tvm.build(s, tensor_l + [out_tensor], device, name="concatenate") data_npys = [ np.random.normal(size=shape).astype(tensor_l[0].dtype) for shape in shapes ] out_npy = np.concatenate(data_npys, axis=axis) data_nds = [tvm.nd.array(data_npy, ctx) for data_npy in data_npys] out_nd = tvm.nd.empty(out_npy.shape, ctx=ctx, dtype=out_tensor.dtype) foo(*(data_nds + [out_nd])) np.testing.assert_allclose(out_nd.asnumpy(), out_npy) check_device("cuda") check_device("opencl") check_device("metal")
def verify_concatenate(shapes, axis): tensor_l = [] for i, shape in enumerate(shapes): tensor_l.append(tvm.placeholder(shape, name="A" + str(i))) out_tensor = topi.concatenate(a_tuple=tensor_l, axis=axis) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_injective(out_tensor) foo = tvm.build(s, tensor_l + [out_tensor], device, name="concatenate") data_npys = [ np.random.normal(size=shape).astype(tensor_l[0].dtype) for shape in shapes ] out_npy = np.concatenate(data_npys, axis=axis) data_nds = [tvm.nd.array(data_npy, ctx) for data_npy in data_npys] out_nd = tvm.nd.empty(out_npy.shape, ctx=ctx, dtype=out_tensor.dtype) foo(*(data_nds + [out_nd])) tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy) for device in get_all_backend(): check_device(device)
def internel_lltm(input, weight_for_gate, bias_for_gate, old_h, old_c): ''' input: [batch_size, 28*28] old_h & old_c: [batch_size, state_size] >>>>> cat -> X: [batch_size, state_size+28*28] weight_for_gate: [3*state_size, state_size+28*28] bias_for_gate:[3*state_size] ''' X = topi.concatenate([old_h, input], axis=1) gate_weights = topi.nn.dense(X, weight_for_gate, bias_for_gate) gates = topi.split(gate_weights, 3, axis=1) input_gate = topi.sigmoid(gates[0]) output_gate = topi.sigmoid(gates[1]) candidate_cell = elu(gates[2]) new_c = topi.add(old_c, topi.multiply(candidate_cell, input_gate)) new_h = topi.multiply(topi.tanh(new_c), output_gate) return [new_h, new_c]
def Concat(device="llvm", lib_path="./", ndim=None, dtype=None, input_num=None, axis=None): ''' concat Args: device: lib_path: all_tensors: ndim: dtype: input_num: axis: Returns: ''' if axis >= ndim: return shapes = [] for i in range(input_num): shape = [] for j in range(ndim): if j == axis: shape.append(tvm.var("axis" + str(i))) else: shape.append(tvm.var("n" + str(j))) shapes.append(shape) in_tensor = [ tvm.placeholder(shape, dtype=dtype, name='in_tensor%d' % i) for i, shape in enumerate(shapes) ] opname = "Concat_ndim%d_%s_input_num%d_axis%d" % (ndim, dtype, input_num, axis) print(opname) # define compute out_tensor = topi.concatenate(tuple(in_tensor), axis) tensor_list = in_tensor + [out_tensor] if ndim < 5: s = topi.generic.schedule_concatenate(out_tensor) else: s = tvm.create_schedule(out_tensor.op) Genlib(s, tensor_list, device, opname, lib_path)
def Stack(device="llvm", lib_path="./", ndim=None, dtype=None, input_num=None, axis=None): ''' stack Args: device: lib_path: ndim: dtype: input_num: axis: Returns: ''' if axis > ndim: return shape = [tvm.var("n" + str(i)) for i in range(ndim)] shapes = [shape] * input_num in_tensor = [ tvm.placeholder(shape, dtype=dtype, name='in_tensor%d' % i) for i, shape in enumerate(shapes) ] opname = "Stack_ndim%d_%s_input_num%d_axis%d" % (ndim, dtype, input_num, axis) print(opname) input_tensor = [topi.expand_dims(ai, axis) for ai in in_tensor] out_tensor = topi.concatenate(tuple(input_tensor), axis=axis) tensor_list = in_tensor + [out_tensor] if ndim < 4: s = topi.generic.schedule_concatenate(out_tensor) else: s = tvm.create_schedule(out_tensor.op) Genlib(s, tensor_list, device, opname, lib_path)
def verify_concatenate(shapes, axis): tensor_l = [] for i, shape in enumerate(shapes): tensor_l.append(tvm.placeholder(shape, name="A" + str(i))) out_tensor = topi.concatenate(a_tuple=tensor_l, axis=axis) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_injective(out_tensor) foo = tvm.build(s, tensor_l + [out_tensor], device, name="concatenate") data_npys = [np.random.normal(size=shape).astype(tensor_l[0].dtype) for shape in shapes] out_npy = np.concatenate(data_npys, axis=axis) data_nds = [tvm.nd.array(data_npy, ctx) for data_npy in data_npys] out_nd = tvm.nd.empty(out_npy.shape, ctx=ctx, dtype=out_tensor.dtype) foo(*(data_nds + [out_nd])) tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy) for device in get_all_backend(): check_device(device)
def concatenate_compute(attrs, inputs, output_type, target): return [topi.concatenate(inputs, axis=attrs.axis)]
y = tvm.placeholder((batch_size, num_classes), 'float32') s = tvm.placeholder((batch_size, num_hidden), 'float32') h = tvm.placeholder((batch_size, num_hidden), 'float32') # Tensors and vars for training graph weights = [tvm.placeholder(x, 'float32') for x in sizes] #Construct model xs = topi.split(topi.reshape(x, (batch_size, num_timesteps, num_input)), num_timesteps, axis=1) xs = [topi.reshape(x, (batch_size, num_input)) for x in xs] new_s = s new_h = h for i in range(num_timesteps): inp = topi.concatenate([xs[i], new_h], 1) g = topi.tanh(topi.matmul(inp, weights[0]) + weights[1]) j = topi.sigmoid(topi.matmul(inp, weights[2]) + weights[3]) f = topi.sigmoid(topi.matmul(inp, weights[4]) + weights[5]) o = topi.sigmoid(topi.matmul(inp, weights[6]) + weights[7]) new_s = new_s * f + g * j new_h = topi.tanh(new_s) * o logits = topi.matmul(new_h, weights[8]) + weights[9] # compute accuracy pred = topi.nn.softmax(logits) correct_pred = topi.equal(topi.argmax(y, 1), topi.argmax(pred, 1)) accuracy = topi.sum(correct_pred.astype('float32')) / batch_size
def compute_concatenate(attrs, inputs, out_info): """Compute definition of concatenate""" axis = attrs.get_int("axis") return topi.concatenate([x for x in inputs], axis=axis)
import topi import tvm import numpy as np import torch dim0 = 8 dim1 = 3 dim2 = 4 shape_size1 = [dim0, dim1] shape_size2 = [dim0, dim2] dtype = "float32" A = tvm.te.placeholder(shape_size1, dtype=dtype, name="A") B = tvm.te.placeholder(shape_size2, dtype=dtype, name="B") C = topi.concatenate([A, B], axis=1) dC = tvm.te.placeholder(C.shape, dtype=dtype, name="dC") dA, dB = tvm.te.mygradient(C, [A, B], dC) s = tvm.te.create_schedule([C.op, dA.op, dB.op]) print(tvm.lower(s, [A, B, dC, dA, dB], simple_mode=True)) func = tvm.build(s, [A, B, dC, dA, dB], target="llvm") A_np = np.random.uniform(-10, 10, shape_size1).astype("float32") B_np = np.random.uniform(-10, 10, shape_size2).astype("float32") dC_np = np.ones([dim0, dim1 + dim2]).astype("float32") dA_np = np.zeros(shape_size1).astype("float32") dB_np = np.zeros(shape_size2).astype("float32")