Example #1
0
    def init_resource_arg_max(self):
        self.op_type_name = "ArgMaxD"
        self.op_attr = acl.op.create_attr()
        ret = acl.op.set_attr_int(self.op_attr, "dimension", 0)
        check_ret("acl.op.set_attr_int", ret)

        # settings of arg_max operator
        self.input_desc_arg_max_d = \
            acl.create_tensor_desc(ACL_FLOAT16,
                                   [self.input_shape, ],
                                   ACL_FORMAT_ND)
        self.output_desc_arg_max_d = \
            acl.create_tensor_desc(ACL_INT32,
                                   [self.output_shape, ],
                                   ACL_FORMAT_ND)

        self.tensor_size_arg_max_d = \
            acl.get_tensor_desc_size(self.output_desc_arg_max_d)
        self.dev_buffer_arg_max_d, ret = \
            acl.rt.malloc(self.tensor_size_arg_max_d,
                          ACL_MEM_MALLOC_NORMAL_ONLY)
        check_ret("acl.rt.malloc", ret)

        self.output_buffer_arg_max_d = \
            acl.create_data_buffer(self.dev_buffer_arg_max_d,
                                   self.tensor_size_arg_max_d)
Example #2
0
def forward_op_batch_matmul(data, stream):
    ret = acl.op.set_model_dir(MODEL_MATMUL_PATH)
    check_ret("acl.op.set_model_dir", ret)
    
    op_attr = acl.op.create_attr()
    ret = acl.op.set_attr_bool(op_attr, "adj_x1", False)
    check_ret("acl.op.set_attr_bool", ret)
    ret = acl.op.set_attr_bool(op_attr, "adj_x2", False)
    check_ret("acl.op.set_attr_bool", ret)

    input_desc_batch_matmul_x1 = \
        acl.create_tensor_desc(ACL_FLOAT,
                                [1, 1, 1024, 1024],
                                ACL_FORMAT_NCHW)
    input_desc_batch_matmul_x2 = \
        acl.create_tensor_desc(ACL_FLOAT,
                                [1, 1, 1024, 27648],
                                ACL_FORMAT_NCHW)
    output_desc_batch_matmul_y = \
        acl.create_tensor_desc(ACL_FLOAT,
                                [1, 1, 1024, 27648],
                                ACL_FORMAT_NCHW)
    tensor_size_batch_matmul_x1 = \
        acl.get_tensor_desc_size(input_desc_batch_matmul_x1)
    tensor_size_batch_matmul_x2 = \
        acl.get_tensor_desc_size(input_desc_batch_matmul_x2)
    tensor_size_batch_matmul_y = \
        acl.get_tensor_desc_size(output_desc_batch_matmul_y)
        
    input_buffer_x1 = create_input(data[0], tensor_size_batch_matmul_x1)
    input_buffer_x2 = create_input(data[1], tensor_size_batch_matmul_x2)
    
    dev_buffer_batch_matmul, ret = \
        acl.rt.malloc(tensor_size_batch_matmul_y,
                      ACL_MEM_MALLOC_NORMAL_ONLY)
    check_ret("acl.rt.malloc", ret)
    

    output_buffer_batch_matmul_y = \
        acl.create_data_buffer(dev_buffer_batch_matmul,
                                tensor_size_batch_matmul_y)    
   
    ret = acl.op.execute_v2(
        OP_TYPE,
        [input_desc_batch_matmul_x1, input_desc_batch_matmul_x2],
        [input_buffer_x1, input_buffer_x2],
        [output_desc_batch_matmul_y],
        [output_buffer_batch_matmul_y],
        op_attr,
        stream)
    check_ret("acl.op.execute_v2", ret)
    ret = acl.rt.synchronize_stream(stream)
    check_ret("acl.rt.synchronize_stream", ret)
    print("[SingleOp] batch_matmul run success")
    return get_forward_result(dev_buffer_batch_matmul, tensor_size_batch_matmul_y)
Example #3
0
    def init_resource_cast(self):
        # settings of cast operator
        self._input_desc = acl.create_tensor_desc(ACL_FLOAT,
                                                  [self.input_shape],
                                                  ACL_FORMAT_ND)
        self._output_desc = acl.create_tensor_desc(ACL_FLOAT16,
                                                   [self.input_shape],
                                                   ACL_FORMAT_ND)

        tensor_size = acl.get_tensor_desc_size(self._output_desc)
        self.dev_buffer_cast, ret = acl.rt.malloc(tensor_size,
                                                  ACL_MEM_MALLOC_NORMAL_ONLY)
        check_ret("acl.rt.malloc", ret)

        self.output_buffer_cast = acl.create_data_buffer(self.dev_buffer_cast,
                                                         tensor_size)
Example #4
0
 def _gen_output_tensor(self):
     print("gen output data stage:")
     self.operator_output = acl.create_tensor_desc(
         acl_dtype[self.data_type], self.shape, self.format_type)
     for factor in [self.operator_output]:
         factor_size = acl.get_tensor_desc_size(factor)
         factor_device, ret = acl.rt.malloc(factor_size,
                                            ACL_MEM_MALLOC_NORMAL_ONLY)
         check_ret("acl.rt.malloc", ret)
         self.device_outputs.append(factor_device)
         self.device_buffer_outputs.append(
             acl.create_data_buffer(factor_device, factor_size))
         self.host_outputs.append(acl.rt.malloc_host(factor_size)[0])
         self.output_desc.append(factor)
         print("gen output data success")
Example #5
0
    def _gen_input_tensor(self):
        print("gen input data stage:")
        for factor in [self.factor_a, self.factor_b]:
            tensor = acl.create_tensor_desc(acl_dtype[self.data_type],
                                            self.shape, self.format_type)
            factor_size = acl.get_tensor_desc_size(tensor)
            factor_device, ret = acl.rt.malloc(factor_size,
                                               ACL_MEM_MALLOC_NORMAL_ONLY)
            check_ret("acl.rt.malloc", ret)
            if "bytes_to_ptr" in dir(acl.util):
                bytes_data = factor.tobytes()
                factor_ptr = acl.util.bytes_to_ptr(bytes_data)
            else:
                factor_ptr = acl.util.numpy_to_ptr(factor)

            ret = acl.rt.memcpy(factor_device, factor_size, factor_ptr,
                                factor_size, ACL_MEMCPY_HOST_TO_DEVICE)
            check_ret("acl.rt.memcpy", ret)
            factor_buffer = acl.create_data_buffer(factor_device, factor_size)
            self._inputs_device.append(factor_device)
            self._inputs_device_buffer.append(factor_buffer)
            self._inputs_desc.append(tensor)
        print("gen input data success")