def create_data(self, axes, keep_dims): if not self.reduction_data: x_data = np.around( np.random.randn(self.m, self.n).astype("float32"), 2) x = runtime.cinn_buffer_t(x_data, runtime.cinn_x86_device) if keep_dims: output_shape = [self.m, self.n] if axes: for i in axes: if i < 0: i = i + len(output_shape) output_shape[i] = 1 else: for i in range(len(output_shape)): output_shape[i] = 1 else: output_shape = [self.m, self.n] if axes: for i in axes: if i < 0: i = i + len(output_shape) output_shape.pop(i) else: output_shape = [1] out = runtime.cinn_buffer_t( np.zeros(output_shape).astype("float32"), runtime.cinn_x86_device) self.reduction_data = [ x_data, x, out, runtime.cinn_pod_value_t(x), runtime.cinn_pod_value_t(out) ] return self.reduction_data
def create_data(self, output_shape, trans_a, trans_b): if not self.transform_data: if trans_a: x_data = np.around( np.random.randn(self.k, self.m).astype("float32"), 2) else: x_data = np.around( np.random.randn(self.m, self.k).astype("float32"), 2) if trans_b: y_data = np.around( np.random.randn(self.n, self.k).astype("float32"), 2) else: y_data = np.around( np.random.randn(self.k, self.n).astype("float32"), 2) x = runtime.cinn_buffer_t(x_data, runtime.cinn_x86_device) y = runtime.cinn_buffer_t(y_data, runtime.cinn_x86_device) out = runtime.cinn_buffer_t( np.zeros(output_shape).astype("float32"), runtime.cinn_x86_device) self.transform_data = [ x_data, y_data, x, y, out, runtime.cinn_pod_value_t(x), runtime.cinn_pod_value_t(y), runtime.cinn_pod_value_t(out) ] return self.transform_data
def to_test_op(self, input_shapes, output_shape, op_name, attrs): ''' Test the operator. ''' self.compiler = cinn.Compiler.create(self.target) inputs = [] inputs_data = [] for i_shape in input_shapes: expr_shape = [] inputs_data.append( np.around(np.random.random(i_shape).astype("float32"), 3)) for dim_shape in i_shape: expr_shape.append(ir.Expr(dim_shape)) inputs.append( lang.Placeholder("float32", self.__gen_var_name(), expr_shape).to_tensor()) args = [] temp_inputs = [] for in_data in inputs_data: temp_inputs.append( runtime.cinn_buffer_t(in_data, runtime.cinn_x86_device)) for in_data in temp_inputs: args.append(runtime.cinn_pod_value_t(in_data)) if output_shape == None: correct_result, output_shape = self.create_target_data( inputs_data, attrs) else: correct_result = self.create_target_data(inputs_data, attrs) module = self.__codegen(op_name, inputs, attrs) self.compiler.build(module) fn = self.compiler.lookup(op_name) out = [] for out_shape in output_shape: out.append( runtime.cinn_buffer_t( np.zeros(out_shape).astype("float32"), runtime.cinn_x86_device)) for out_data in out: args.append(runtime.cinn_pod_value_t(out_data)) fn(args) out_result = out[len(out) - 1].numpy() self.assertTrue(np.allclose(out_result, correct_result, atol=1e-4))
def create_data(self, dtype): if not self.unary_data: x_data = np.around( np.random.randn(self.m, self.n).astype(dtype), 2) x = runtime.cinn_buffer_t(x_data, runtime.cinn_x86_device) out = runtime.cinn_buffer_t( np.zeros([self.m, self.n]).astype(dtype), runtime.cinn_x86_device) self.unary_data = [ x_data, x, out, runtime.cinn_pod_value_t(x), runtime.cinn_pod_value_t(out) ] return self.unary_data
def create_data(m, n, k, bn): # call around to lower the numpy's float precision so that it will not vary too much from C's float precision. a_init = np.around(np.random.randn(m, k).astype("float32"), 2) b_init = np.around(np.random.randn(k, n).astype("float32"), 2) a = runtime.cinn_buffer_t(a_init, runtime.cinn_x86_device) b = runtime.cinn_buffer_t(b_init, runtime.cinn_x86_device) c = runtime.cinn_buffer_t( np.zeros([m, n]).astype("float32"), runtime.cinn_x86_device) c_target = runtime.cinn_buffer_t(a.numpy() @ b.numpy(), runtime.cinn_x86_device) packed_b = runtime.cinn_buffer_t( np.zeros([n // bn, k, bn]).astype("float32"), runtime.cinn_x86_device) a_arg = runtime.cinn_pod_value_t(a) b_arg = runtime.cinn_pod_value_t(b) c_arg = runtime.cinn_pod_value_t(c) packed_b_arg = runtime.cinn_pod_value_t(packed_b) return [a, b, c, c_target, a_arg, b_arg, c_arg]