Esempio n. 1
0
def gen_op_var(inputs, output, op_var):
    """
    Combine inputs and outputs about the op.

    Args:
        inputs(list): the inputs of op.
        output(list): the outputs of op.
        op_var (list): inputs and outputs for the op.
    """
    for xx in inputs:
        if isinstance(xx, list):
            for x in xx:
                op_var.append(x)
        else:
            op_var.append(xx)
    if isinstance(output, (list, tuple)):
        op_var = op_var + [i for i in output if TensorUtils.is_output_value(i)]
    else:
        if TensorUtils.is_output_value(output):
            op_var = op_var + [output]
    return op_var
Esempio n. 2
0
def op_build(op_func,
             input_shapes,
             input_types,
             op_attrs=None,
             kernel_name="",
             attrs=None,
             log_cce=False,
             dump_ir=True,
             dump_cce=True,
             polyhedral=True,
             tuning=False):
    """
    Return module built from op_func with given inputs.

    Args:
        op_func (function returning an op or (op, [op_vars])): The op build function.
        input_shapes(iterable of iterable of int): the dim sizes for input for op.
        input_types (iterable of iterable of str): the dtypes for each input.
        op_attrs (list or tuple): extra attributes for the op.
        kernel_name (str): name of op.
        attrs (dict): tiling parameter.
        log_cce (bool): False by default.
        dump_ir (bool): True by default.
        dump_cce (bool): False by default.
        polyhedral (bool): True by default.
        tuning (bool): False by default.

    Return:
        module.
    """
    inputs = []
    set_dim_key = ""
    shape_params = []
    for i, (shape, dtype) in enumerate(zip(input_shapes, input_types)):
        if isinstance(shape, (list, tuple)) and shape and isinstance(
                shape[0], (list, tuple)):
            tmp_input = []
            for j, tmp_shape in enumerate(shape):
                tmp_input.append(
                    akg.tvm.placeholder(tmp_shape, dtype,
                                        "input_%d_%d" % (i + 1, j + 1)))
                for tmp in tmp_shape:
                    if isinstance(tmp, akg.tvm.expr.Var):
                        shape_params.append(tmp)
            inputs.append(tmp_input)
        elif isinstance(shape, (list, tuple)) and shape and isinstance(
                shape[0], akg.tvm.expr.Var):
            inputs.append(
                akg.tvm.placeholder(shape, dtype, "input_%d" % (i + 1)))
            for tmp_shape in shape:
                if isinstance(tmp_shape, akg.tvm.expr.Var):
                    shape_params.append(tmp_shape)
        elif isinstance(shape, akg.tvm.tensor.Tensor):
            inputs.append(shape)
            for tmp_shape in shape.shape:
                shape_params.append(tmp_shape)
        else:
            inputs.append(
                akg.tvm.placeholder(shape, dtype, "input_%d" % (i + 1)))
    attrs_params = []
    if op_attrs is not None:
        args = inputs + op_attrs
        for tmp_attr in op_attrs:
            if isinstance(tmp_attr, (list, tuple)) and tmp_attr and isinstance(
                    tmp_attr[0], akg.tvm.expr.Var):
                for attr_param in tmp_attr:
                    if isinstance(attr_param, akg.tvm.expr.Var):
                        attrs_params.append(attr_param)
            elif isinstance(tmp_attr, akg.tvm.expr.Var):
                attrs_params.append(tmp_attr)
    else:
        args = inputs

    # backup inputs because the tensor names may be updated inside op_func
    inputs_backup = recursive_copy(inputs)

    output = op_func(*args)

    # restore inputs to make sure that tensor names are not changed by op_func
    inputs = inputs_backup

    if attrs is None or 'dim' not in attrs or not attrs['dim']:
        dim_info = ""
        if attrs is None:
            attrs = dict()

        if op_func.__name__ in ct_util.set_dim_func_map.keys():
            value = ct_util.set_dim_func_map[op_func.__name__]
            if inspect.isfunction(value):
                dim_info = value(*args)
            elif isinstance(value, dict):
                key = []
                key.append(ft_util.convert_to_list(input_shapes))
                key.append(ft_util.convert_to_list(input_types))
                if op_attrs is not None:
                    key.append(op_attrs)
                key = str(tuple(key))

                if key in value.keys():
                    dim_info = ct_util.set_dims(value[key])
            else:
                raise RuntimeError(
                    "Registered set_dim_map is invalid. Must be a function or a dict!"
                )
        if isinstance(dim_info, (list, tuple)):
            dim_info = dim_info[0]

        attrs['dim'] = dim_info

    compute_func = None  # func which is defined in dsl for doing compute_inline or other
    sch_tmpl = None
    if isinstance(output, (list, tuple)):
        from inspect import isfunction
        new_outputs = []
        for elem in output:
            if isfunction(elem):
                compute_func = elem
            elif isinstance(elem, dict):
                for key, value in elem.items():
                    if key not in attrs or not attrs[key]:
                        attrs[key] = value
            elif isinstance(elem, (list, tuple)):
                new_outputs += elem
            else:
                new_outputs.append(elem)

        output = new_outputs
    elif isinstance(output, dict):
        sch_tmpl = output
        output = sch_tmpl['output']
    binds = None if not attrs else attrs.pop(BINDS, None)

    op_var = []
    for xx in inputs:
        if isinstance(xx, list):
            for x in xx:
                op_var.append(x)
        else:
            op_var.append(xx)
    shape_var = []
    if attrs_params:
        [shape_var.append(i) for i in attrs_params if i not in shape_var]
    [shape_var.append(i) for i in shape_params if i not in shape_var]
    if isinstance(output, (list, tuple)):
        op_var = op_var + [i for i in output if TensorUtils.is_output_value(i)]
    else:
        if TensorUtils.is_output_value(output):
            op_var = op_var + [output]

    if sch_tmpl != None:
        assert (sch_tmpl['target'] == 'cuda')
        kernel_name = kernel_name if kernel_name != "" else sch_tmpl['op_name']
        with akg.tvm.target.cuda() as target:
            s = sch_tmpl['schedule'](sch_tmpl['output'])
            with akg.tvm.build_config(dump_pass_ir=True):
                mod = akg.tvm.build(s,
                                    op_var,
                                    target,
                                    target_host='stackvm',
                                    name=kernel_name)
                dump_cuda_meta.dump(mod, kernel_name, s, op_var)
                return mod

    if isinstance(output, (list, tuple)):
        tmp = []
        for x in list(output):
            if isinstance(x, tuple):
                tmp.append(x[0].op)
            else:
                tmp.append(x.op)
        s = akg.tvm.create_schedule(tmp)
    else:
        s = akg.tvm.create_schedule(output.op)
    if compute_func is not None:
        compute_func(s)
        polyhedral = False
    kernel_name = kernel_name if kernel_name != "" else op_func.__name__
    mode = get_runtime_mode()
    level = attrs.get("help_tiling")
    if tuning or (level is not None and level > help_tiling_level['None']):
        if op_func.__name__ in ct_util.set_dim_func_map.keys():
            func_ = ct_util.set_dim_func_map[op_func.__name__]
            if inspect.isfunction(func_):
                set_dim_key = func_(*args)[1]
        elif op_func.__name__ in ct_util.gen_key_func_map.keys():
            func_ = ct_util.gen_key_func_map[op_func.__name__]
            if inspect.isfunction(func_):
                set_dim_key = func_(*args)
        with akg.build_config(add_lower_pass=cce.debug_mode(0),
                              dump_pass_ir=True):
            spaces = akg.lower(s,
                               op_var,
                               name=kernel_name,
                               attrs=attrs,
                               polyhedral=polyhedral,
                               tuning=tuning)
            if set_dim_key == "":
                set_dim_key = str(args)
            return spaces, set_dim_key

    if mode == "cpu":
        mod = akg.tvm.build(s, op_var, "llvm")
        if not os.path.isdir("./cpu/ir/"):
            os.makedirs("./cpu/ir/")
        with os.fdopen(
                os.open("./cpu/ir/" + kernel_name + ".cc",
                        os.O_WRONLY | os.O_CREAT, 0o400), 'w') as irf:
            irf.write(akg.tvm.lower(s, op_var, shape_var, simple_mode=True))
        return mod
    with akg.build_config(add_lower_pass=cce.debug_mode(0),
                          dump_pass_ir=dump_ir):
        mod = akg.build(s,
                        op_var,
                        "cce",
                        shape_var,
                        name=kernel_name,
                        attrs=attrs,
                        polyhedral=polyhedral,
                        binds=binds)
        if mod is None:
            return None
        source_code = mod.imported_modules[0].get_source()
    if log_cce:
        logging.debug("#################cce code####################")
        logging.debug(source_code)
    if dump_cce:
        cce_path = "./"
        create_cce(kernel_name, cce_path, source_code)

    return mod
Esempio n. 3
0
def op_build(op_func,
             input_shapes,
             input_types,
             op_attrs=None,
             kernel_name="",
             attrs=None,
             log_cce=False,
             dump_ir=True,
             dump_code=True,
             polyhedral=True,
             tuning=False):
    """
    Return module built from op_func with given inputs.

    Args:
        op_func (function returning an op or (op, [op_vars])): The op build function.
        input_shapes(iterable of iterable of int): the dim sizes for input for op.
        input_types (iterable of iterable of str): the dtypes for each input.
        op_attrs (list or tuple): extra attributes for the op.
        kernel_name (str): name of op.
        attrs (dict): tiling parameter.
        log_cce (bool): False by default.
        dump_ir (bool): True by default.
        dump_code (bool): False by default.
        polyhedral (bool): True by default.
        tuning (bool): False by default.

    Return:
        module.
    """
    inputs = []
    set_dim_key = ""
    shape_params = []  # save all the shape params for dynamic_shape cases
    for i, (shape, dtype) in enumerate(zip(input_shapes, input_types)):
        if isinstance(shape, (list, tuple)) and shape and isinstance(
                shape[0], (list, tuple)):
            tmp_input = []
            for j, tmp_shape in enumerate(shape):
                tmp_input.append(
                    akg.tvm.placeholder(tmp_shape, dtype,
                                        "input_%d_%d" % (i + 1, j + 1)))
                for tmp in tmp_shape:
                    if isinstance(tmp, akg.tvm.expr.Var):
                        shape_params.append(tmp)
            inputs.append(tmp_input)
        elif isinstance(shape, (list, tuple)) and shape and isinstance(
                shape[0], akg.tvm.expr.Var):
            inputs.append(
                akg.tvm.placeholder(shape, dtype, "input_%d" % (i + 1)))
            for tmp_shape in shape:
                if isinstance(tmp_shape, akg.tvm.expr.Var):
                    shape_params.append(tmp_shape)
        elif isinstance(shape, akg.tvm.tensor.Tensor):
            inputs.append(shape)
            for tmp_shape in shape.shape:
                shape_params.append(tmp_shape)
        else:
            inputs.append(
                akg.tvm.placeholder(shape, dtype, "input_%d" % (i + 1)))
    attrs_params = []
    if op_attrs is not None:
        args = inputs + op_attrs
        for tmp_attr in op_attrs:
            if isinstance(tmp_attr, (list, tuple)) and tmp_attr and isinstance(
                    tmp_attr[0], akg.tvm.expr.Var):
                for attr_param in tmp_attr:
                    if isinstance(attr_param, akg.tvm.expr.Var):
                        attrs_params.append(attr_param)
            elif isinstance(tmp_attr, akg.tvm.expr.Var):
                attrs_params.append(tmp_attr)
    else:
        args = inputs

    # backup inputs because the tensor names may be updated inside op_func
    inputs_backup = recursive_copy(inputs)

    output = op_func(*args)

    # restore inputs to make sure that tensor names are not changed by op_func
    inputs = inputs_backup

    if attrs is None or 'dim' not in attrs or not attrs['dim']:
        dim_info = ""
        if attrs is None:
            attrs = dict()

        if op_func.__name__ in ct_util.set_dim_func_map.keys():
            value = ct_util.set_dim_func_map[op_func.__name__]
            if inspect.isfunction(value):
                dim_info = value(*args)
            elif isinstance(value, dict):
                key = []
                key.append(ft_util.convert_to_list(input_shapes))
                key.append(ft_util.convert_to_list(input_types))
                if op_attrs is not None:
                    key.append(op_attrs)
                key = str(tuple(key))

                if key in value.keys():
                    dim_info = ct_util.set_dims(value[key])
            else:
                raise RuntimeError(
                    "Registered set_dim_map is invalid. Must be a function or a dict!"
                )
        if isinstance(dim_info, (list, tuple)):
            dim_info = dim_info[0]

        attrs['dim'] = dim_info

    compute_func = None  # func which is defined in dsl for doing compute_inline or other
    sch_tmpl = None
    gpu_binds = None
    if isinstance(output, (list, tuple)):
        from inspect import isfunction
        new_outputs = []
        for elem in output:
            if isfunction(elem):
                compute_func = elem
            elif isinstance(elem, dict):
                for key, value in elem.items():
                    if key not in attrs or not attrs[key]:
                        attrs[key] = value
            elif isinstance(elem, (list, tuple)):
                new_outputs += elem
            else:
                new_outputs.append(elem)

        output = new_outputs
    elif isinstance(output, dict):
        sch_tmpl = output
        output = sch_tmpl['output']
        gpu_binds = sch_tmpl['binds']
    binds = None if not attrs else attrs.pop(BINDS, None)

    op_var = []
    for xx in inputs:
        if isinstance(xx, list):
            for x in xx:
                op_var.append(x)
        else:
            op_var.append(xx)
    shape_var = []
    if attrs_params:
        [shape_var.append(i) for i in attrs_params if i not in shape_var]
    [shape_var.append(i) for i in shape_params if i not in shape_var]
    if isinstance(output, (list, tuple)):
        op_var = op_var + [i for i in output if TensorUtils.is_output_value(i)]
    else:
        if TensorUtils.is_output_value(output):
            op_var = op_var + [output]

    if sch_tmpl is not None or (attrs
                                and attrs.get("target", "cce") == "cuda"):
        if kernel_name == "":
            kernel_name = op_func.__name__ if sch_tmpl is None else sch_tmpl[
                'op_name']

    if sch_tmpl is not None:
        if sch_tmpl['target'] != CUDA:
            raise ValueError(
                "Only support cuda as target when using schedule template.")
        global kc_air_mode
        kc_air_mode = "CUDA"
        with akg.tvm.target.cuda() as target:
            if not tuning:
                s = sch_tmpl['schedule'](sch_tmpl['output'])
                with akg.tvm.build_config(dump_pass_ir=dump_ir):
                    mod = akg.build(s,
                                    op_var,
                                    "cuda",
                                    shape_var,
                                    name=kernel_name,
                                    attrs=attrs,
                                    polyhedral=False,
                                    binds=gpu_binds)
            else:

                @autotvm.template
                def _autotune_template():
                    s = sch_tmpl['schedule'](sch_tmpl['output'])
                    return (s, op_var)

                # create autotune task
                task = autotvm.task.create(_autotune_template,
                                           args=list(),
                                           target='cuda')

                print("task config: ", task.config_space)

                # set measure_option
                measure_option = autotvm.measure_option(
                    builder=autotvm.LocalBuilder(),
                    runner=autotvm.LocalRunner(repeat=5,
                                               min_repeat_ms=150,
                                               timeout=4))

                # Begin tuning, log records to file `kernel_name.log`
                tuner = autotvm.tuner.RandomTuner(task)
                if not os.path.exists(kernel_name + '.log'):
                    tuner.tune(n_trial=len(task.config_space),
                               measure_option=measure_option,
                               callbacks=[
                                   autotvm.callback.log_to_file(kernel_name +
                                                                '.log')
                               ])

                # query best config
                dispatch_context = autotvm.apply_history_best(kernel_name +
                                                              '.log')
                best_config = dispatch_context.query(task.target,
                                                     task.workload)
                print("\nBest config is:")
                print(best_config)

                # apply best config
                with autotvm.apply_history_best(kernel_name + '.log'):
                    s, op_var = _autotune_template()
                    mod = akg.build(s,
                                    op_var,
                                    "cuda",
                                    shape_var,
                                    name=kernel_name,
                                    attrs=attrs,
                                    polyhedral=False,
                                    binds=gpu_binds)

            if dump_code:
                source_code = mod.imported_modules[0].get_source()
                create_code(kernel_name, "./", source_code, CUDA)
            return mod

    if isinstance(output, (list, tuple)):
        tmp = []
        for x in list(output):
            if isinstance(x, tuple):
                tmp.append(x[0].op)
            else:
                tmp.append(x.op)
        s = akg.tvm.create_schedule(tmp)
    else:
        s = akg.tvm.create_schedule(output.op)
    if compute_func is not None:
        compute_func(s)
        polyhedral = False
    mode = get_runtime_mode()
    level = attrs.get("help_tiling")
    if tuning or (level is not None and level > help_tiling_level['None']):
        if op_func.__name__ in ct_util.set_dim_func_map.keys():
            func_ = ct_util.set_dim_func_map[op_func.__name__]
            if inspect.isfunction(func_):
                set_dim_key = func_(*args)[1]
        elif op_func.__name__ in ct_util.gen_key_func_map.keys():
            func_ = ct_util.gen_key_func_map[op_func.__name__]
            if inspect.isfunction(func_):
                set_dim_key = func_(*args)
        with akg.build_config(dump_pass_ir=True):
            spaces = akg.lower(s,
                               op_var,
                               name=kernel_name,
                               attrs=attrs,
                               polyhedral=polyhedral,
                               tuning=tuning)
            if set_dim_key == "":
                set_dim_key = str(args)
            return spaces, set_dim_key

    if mode == "cpu":
        mod = akg.tvm.build(s, op_var, "llvm")
        if not os.path.isdir("./cpu/ir/"):
            os.makedirs("./cpu/ir/")
        with os.fdopen(
                os.open("./cpu/ir/" + kernel_name + ".cc",
                        os.O_WRONLY | os.O_CREAT, 0o400), 'w') as irf:
            irf.write(akg.tvm.lower(s, op_var, shape_var, simple_mode=True))
        return mod
    target = CUDA if attrs and attrs.get("target", "cce") == CUDA else CCE
    with akg.build_config(dump_pass_ir=dump_ir):
        mod = akg.build(s,
                        op_var,
                        target,
                        shape_var,
                        name=kernel_name,
                        attrs=attrs,
                        polyhedral=polyhedral,
                        binds=binds)
    if mod is None:
        return None
    source_code = mod.imported_modules[0].get_source()
    if log_cce:
        logging.debug("#################cce code####################")
        logging.debug(source_code)
    if dump_code:
        create_code(kernel_name, "./", source_code, target)
    return mod
Esempio n. 4
0
def _compilewithjson_to_module_op(kernel_info, attrs, processor):
    """compile with json for single op."""

    def _get_op_func(op_name):
        op_func = None
        # get custom ops implementation first.
        if 'impl_path' in kernel_info and kernel_info['impl_path'] is not None:
            impl_path = os.path.realpath(kernel_info['impl_path'])
            if os.path.isfile(impl_path):
                custom_mod_name = Path(impl_path).resolve().stem
                mod_spec = importlib.util.spec_from_file_location(
                    custom_mod_name, impl_path)
                custom_mod = importlib.util.module_from_spec(mod_spec)
                mod_spec.loader.exec_module(custom_mod)
                op_func = getattr(custom_mod, op_name, None)

        # get built-in ops.
        if op_func is None:
            op_func = get_op(op_name, attrs["target"])
        return op_func

    def _compilewithjson_cuda(op_func):
        input_shapes = []
        input_types = []
        for input_desc in kernel_info['input_desc']:
            input_shapes.append(input_desc[0]['shape'])
            input_types.append(input_desc[0]['data_type'])
        op_attrs = []
        if kernel_info['attr']:
            for ext_arg in kernel_info['attr']:
                op_attrs.append(ext_arg['value'])
        dump_ir = os.getenv(get_dump_ir_flag()) == "on"
        dump_code = os.getenv(get_dump_code_flag()) == "on"
        kernel_exec.op_build(op_func, input_shapes, input_types, op_attrs, kernel_info['op'], attrs=attrs,
                             dump_ir=dump_ir, dump_code=dump_code)
        return True

    def _update_attrs(elem):
        for key, value in elem.items():
            if key not in attrs or not attrs[key]:
                attrs[key] = value

    def _parse_output(output):
        schedule_func = None
        if isinstance(output, (list, tuple)):
            from inspect import isfunction
            tmp_outputs = []
            for elem in output:
                if isfunction(elem):
                    schedule_func = elem
                elif isinstance(elem, dict):
                    _update_attrs(elem)
                else:
                    tmp_outputs.append(elem)
            output = tmp_outputs
        else:
            output = [output]
        return schedule_func, output

    op_name = kernel_info['name']
    op_func = _get_op_func(op_name)
    if op_func is None:
        logging.error(
            "this op not support by akg, please check op name %s", str(op_name))
        return False
    if processor == 'cuda':
        return _compilewithjson_cuda(op_func)

    args = {}
    tsr = []
    for input_desc in kernel_info['input_desc']:
        if len(input_desc) == 1:
            tensor_shape = input_desc[0]['shape']
            tensor_shape = (1,) if not tensor_shape else tensor_shape
            utils.shape_dtype_max_size_check(
                tensor_shape, input_desc[0]['data_type'])
            args[input_desc[0]['name']] = akg.tvm.placeholder(
                shape=tensor_shape, name=input_desc[0]['tensor_name'], dtype=input_desc[0]['data_type'])
            tsr.append(args[input_desc[0]['name']])
        else:
            tmp_input = []
            for tmp_desc in input_desc:
                tensor_shape = tmp_desc['shape']
                tensor_shape = (1,) if not tensor_shape else tensor_shape
                utils.shape_dtype_max_size_check(
                    tensor_shape, tmp_desc['data_type'])
                tmp_input.append(akg.tvm.placeholder(
                    shape=tensor_shape, name=tmp_desc['tensor_name'], dtype=tmp_desc['data_type']))
            args[input_desc[0]['name']] = tmp_input
            tsr = tsr + tmp_input

    if kernel_info['attr']:
        for ext_arg in kernel_info['attr']:
            args[ext_arg['name']] = ext_arg['value']

    output = op_func(**args, target=attrs["target"])
    schedule_func, output = _parse_output(output)

    tsr = tsr + [i for i in output if TensorUtils.is_output_value(i)]
    build_res = op_build([op_name], output, tsr, schedule_func, processor, kernel_info['op'], attrs)
    if not build_res:
        return False
    return True
Esempio n. 5
0
def compilewithjson(json_str):
    """compile with json."""
    try:
        kernel_info = json.loads(json_str)
    except jd.JSONDecodeError:
        logging.error(traceback.format_exc())
        return False

    op_name = kernel_info['name']
    op_func = None
    processor = 'aicore'
    if 'process' in kernel_info:
        processor = kernel_info['process']
    # get custom ops implementation first.
    if 'impl_path' in kernel_info and kernel_info['impl_path'] is not None:
        impl_path = os.path.realpath(kernel_info['impl_path'])
        if os.path.isfile(impl_path):
            custom_mod_name = Path(impl_path).resolve().stem
            mod_spec = importlib.util.spec_from_file_location(
                custom_mod_name, impl_path)
            custom_mod = importlib.util.module_from_spec(mod_spec)
            mod_spec.loader.exec_module(custom_mod)
            op_func = getattr(custom_mod, op_name, None)

    # get built-in ops.
    if op_func is None:
        if processor == 'cuda':
            op_func = getattr(gpu, op_name, None)

    if op_func is None:
        logging.error("this op not supported, please check op name %s",
                      str(op_name))
        return False

    args = {}
    tsr = []
    for input_desc in kernel_info['input_desc']:
        if len(input_desc) == 1:
            tensor_shape = input_desc[0]['shape']
            tensor_shape = (1, ) if not tensor_shape else tensor_shape
            vc_util.shape_dtype_max_size_check(tensor_shape)
            args[input_desc[0]['name']] = akg.tvm.placeholder(
                shape=tensor_shape,
                name=input_desc[0]['tensor_name'],
                dtype=input_desc[0]['data_type'])
            tsr.append(args[input_desc[0]['name']])
        else:
            tmp_input = []
            for tmp_desc in input_desc:
                tensor_shape = tmp_desc['shape']
                tensor_shape = (1, ) if not tensor_shape else tensor_shape
                vc_util.shape_dtype_max_size_check(tensor_shape)
                tmp_input.append(
                    akg.tvm.placeholder(shape=tensor_shape,
                                        name=tmp_desc['tensor_name'],
                                        dtype=tmp_desc['data_type']))
            args[input_desc[0]['name']] = tmp_input
            tsr = tsr + tmp_input

    if kernel_info['attr']:
        for ext_arg in kernel_info['attr']:
            args[ext_arg['name']] = ext_arg['value']

    output = op_func(**args)

    schedule_func = None
    attrs = {}
    if isinstance(output, (list, tuple)):
        from inspect import isfunction
        tmp_outputs = []
        for elem in output:
            if isfunction(elem):
                schedule_func = elem
            elif isinstance(elem, dict):
                for key, value in elem.items():
                    if key not in attrs or not attrs[key]:
                        attrs[key] = value
            else:
                tmp_outputs.append(elem)

        output = tmp_outputs
    else:
        output = [output]

    tsr = tsr + [i for i in output if TensorUtils.is_output_value(i)]
    return op_build([op_name], output, tsr, schedule_func, processor,
                    kernel_info['op'], attrs)