Beispiel #1
0
 def set_op_node_name(self, node: pm.Node, operand: OperandTemplate):
     if operand.node_name is not None and operand.node_name != node.name:
         raise RuntimeError(
             f"Name already set to different value for operand:\n"
             f"Previous name: {operand.node_name}\n"
             f"New name: {node.name}")
     operand.set_node_name(node.name)
Beispiel #2
0
    def set_dim_values(self, node: pm.Node, operand: OperandTemplate):

        if not operand.is_instantiated():
            if len(operand.permutation) == len(node.shape):
                perm_map = {
                    s: operand.permutation[s]
                    for s in range(len(node.shape))
                }
            else:
                perm_map = {s: s for s in range(len(node.shape))}
            for j, s_ in enumerate(node.shape):
                key = operand.shape_list[j]
                s = node.shape[perm_map[j]]
                operand.update_shape_symbols(key, s)

                if key not in self.required_params:
                    self.add_required_param(key, s)
                elif key in self.required_params:
                    if not self.required_params[key].is_set():
                        self.set_required_param(key, s)
                    elif self.required_params[key].value != s:
                        raise RuntimeError(
                            f"Inconsistent dimension sizes for operation {self.op_name}{self.instance_id}\n"
                            f"Key: {key}\n"
                            f"Size: {self.required_params[key].value}\n"
                            f"Node shape: {node.shape}\n"
                            f"Node name: {operand.name}\n"
                            f"Shape list: {operand.shape_list}")

            if len(operand.shape_list) != len(
                    list(operand.shape_symbols.keys())):
                raise RuntimeError(
                    f"All shape values were not set for node {node.name}, operand {operand.name}:\n"
                    f"Node shape: {node.shape}\n"
                    f"Operand shape variables: {operand.shape_list}")
def global_avg_pool(hag: ArchitectureNode):
    #
    data = OperandTemplate("data",
                           OP_DTYPES, ["N", "C", "IH", "IW"],
                           dtype=OP_DTYPES[2])
    #
    out = OperandTemplate("out",
                          OP_DTYPES, ["N", "C", "OH", "OW"],
                          dtype=OP_DTYPES[2])
    # # TODO: Add option to create operand
    with Codelet("global_avg_pool", [data], [out], hag) as cdlt:

        cdlt.configure("start", "SIMD")
        cdlt.configure("start", "IMM", immediate_value=0)

        with Loop(0, "N") as n:
            with Loop(0, "C") as c:
                with Loop(0, "IH") as iy:
                    with Loop(0, "IW") as ix:
                        with Loop(0, "OH") as oy:
                            with Loop(0, "OW") as ox:
                                cdlt.transfer(data[n, c, iy + oy, ix + ox],
                                              ["DRAM", "VMEM1"])
                                cdlt.compute("MAX", [data, data], [out],
                                             target="SIMD")
                                cdlt.transfer(out[n, c, oy, ox],
                                              ["VMEM1", "DRAM"])
    return cdlt
def maxpool2d(hag: ArchitectureNode):
    #
    data = OperandTemplate("data",
                           OP_DTYPES, ["N", "C", "IH", "IW"],
                           dtype=OP_DTYPES[2])
    #
    out = OperandTemplate("out",
                          OP_DTYPES, ["N", "C", "OH", "OW"],
                          dtype=OP_DTYPES[2])
    # # TODO: Add option to create operand
    with Codelet("max_pool", [data], [out], hag) as cdlt:

        cdlt.configure("start", "SIMD")
        cdlt.configure("start", "IMM", immediate_value=0)

        with Loop(0, "N") as n:
            with Loop(0, "C") as c:
                with Loop(0, "KH") as kh:
                    with Loop(0, "KW") as kw:
                        with Loop(0, "OH") as y:
                            with Loop(0, "OW") as x:
                                cdlt.transfer(
                                    data[n, c, y * "sy" + kh, x * "sx" + kw],
                                    ["DRAM", "VMEM1"])
                                cdlt.compute("MAX", [data, data], [out],
                                             target="SIMD")
                                cdlt.transfer(out[n, c, y, x],
                                              ["VMEM1", "DRAM"])
    return cdlt
Beispiel #5
0
def conv2d_bias(hag: ArchitectureNode):
    # TODO: Need to figure out how to change the memory layout
    data = OperandTemplate("data",
                           OP_DTYPES, ["N", "IC", "IH", "IW"],
                           dtype=OP_DTYPES[0])
    weight = OperandTemplate("weight",
                             OP_DTYPES, ["OC", "IC", "KH", "KW"],
                             dtype=OP_DTYPES[0])
    bias = OperandTemplate("bias", OP_DTYPES, ["OC"], dtype=OP_DTYPES[2])
    out = OperandTemplate("out",
                          OP_DTYPES, ["N", "OC", "OH", "OW"],
                          dtype=OP_DTYPES[2])
    required_params = {}

    with Codelet("conv_bias", [data, weight, bias], [out],
                 hag,
                 required_params=required_params) as cdlt:

        cdlt.configure("start", "systolic_array")
        cdlt.configure("start", "WBUF")
        cdlt.configure("start", "BBUF")
        cdlt.configure("start", "IBUF")
        cdlt.configure("start", "OBUF")
        with Loop(0, "OC") as oc:
            with Loop(0, "N") as n:
                with Loop(0, "IC") as ic:
                    with Loop(0, "KH") as kh:
                        with Loop(0, "KW") as kw:
                            with Loop(0, "OH") as y:
                                with Loop(0, "OW") as x:
                                    cdlt.transfer(weight[oc, ic, kh, kw],
                                                  ["DRAM", "WBUF"])
                                    cdlt.transfer(bias[oc], ["DRAM", "BBUF"])
                                    cdlt.transfer(
                                        data[n, ic, y * "stride" + kh,
                                             x * "stride" + kw],
                                        ["DRAM", "IBUF"])
                                    cdlt.transfer(out[n, oc, y, x],
                                                  ["DRAM", "OBUF"])
                                    cdlt.compute("MVMUL", [data, weight, bias],
                                                 [out],
                                                 target="pe_array")
                                    # cdlt.compute("MVMUL", [data[n, ic, y*"stride" + kh, x*"stride" + kw], weight[oc, ic, kh, kw], bias[oc]], [out[n, oc, y, x]], target="pe_array")
                                    cdlt.transfer(out[n, oc, y, x],
                                                  ["OBUF", "DRAM"])

        # TODO: Add store off chip
        cdlt.configure("end", "WBUF")
        cdlt.configure("end", "BBUF")
        cdlt.configure("end", "IBUF")
        cdlt.configure("end", "OBUF")
        cdlt.configure("end", "systolic_array")
    return cdlt
Beispiel #6
0
def relu(hag: ArchitectureNode):
    op1 = OperandTemplate("op1",
                          OP_DTYPES, ["N", "C", "H", "W"],
                          dtype=OP_DTYPES[2])
    out = OperandTemplate("out",
                          OP_DTYPES, ["N", "C", "H", "W"],
                          dtype=OP_DTYPES[2])
    with Codelet("relu", [op1], [out], hag) as cdlt:
        cdlt.configure("start", "SIMD")
        # cdlt.configure("start", "VMEM")
        with Loop(0, "N") as n:
            with Loop(0, "C") as c:
                with Loop(0, "H") as h:
                    with Loop(0, "W") as w:
                        cdlt.transfer(op1[n, c, h, w], ["DRAM", "VMEM1"])
                        cdlt.compute("RELU", [op1], [out], target="SIMD")
                        cdlt.transfer(out[n, c, h, w], ["VMEM1", "DRAM"])
    return cdlt
Beispiel #7
0
def maxpool2d_nchw(hag: ArchitectureNode):
    # loop_order = ["n", "oc", "kh", "kh", "ow", "oh"]
    #
    data = OperandTemplate("data", OP_DTYPES, ["N", "C", "IH", "IW"])
    #
    out = OperandTemplate("out", OP_DTYPES, ["N", "C", "OH", "OW"])
    # # TODO: Add option to create operand
    # # 1. set IMM to negative infinity
    with Codelet("max_pool", [data], [out], hag) as cdlt:

        cdlt.configure("start", "SIMD")
        cdlt.configure("start", "VMEM")
        cdlt.configure("start", "IMM")  # 1.
        with Loop(0, "N") as n:
            with Loop(0, "C") as c:
                with Loop(0, "OH") as y:
                    with Loop(0, "OW") as x:
                        cdlt.transfer(data[n, c, y * "stride", x * "stride"],
                                      ["DRAM", "VMEM"])

                        cdlt.compute("MAX", [data, "IMM"], [out],
                                     target="SIMD")
                        cdlt.transfer(out[n, c, y, x], ["SIMD", "VMEM"])

        with Loop(0, "N") as n:
            with Loop(0, "C") as c:
                with Loop(0, "KH") as kh:
                    with Loop(0, "KW") as kw:
                        with Loop(0, "OH") as y:
                            with Loop(0, "OW") as x:
                                cdlt.transfer(
                                    data[n, c, y * "stride" + kh,
                                         x * "stride" + kw], ["DRAM", "VMEM"])
                                cdlt.transfer(out[n, c, y, x], ["VMEM"])
                                cdlt.compute("MAX", [data], [out[n]],
                                             target="SIMD")
                                cdlt.transfer(out[n, c, y, x],
                                              ["pe_array", "OBUF", "DRAM"])

        # TODO: Add store off chip
        cdlt.configure("end", "WBUF")
        cdlt.configure("end", "IBUF")
        cdlt.configure("end", "OBUF")
Beispiel #8
0
def elem_add(hag: ArchitectureNode):
    op1 = OperandTemplate("op1",
                          OP_DTYPES, ["N", "C", "H", "W"],
                          dtype=OP_DTYPES[2])
    op2 = OperandTemplate("op2",
                          OP_DTYPES, ["N", "C", "H", "W"],
                          dtype=OP_DTYPES[2])
    out = OperandTemplate("add_out",
                          OP_DTYPES, ["N", "C", "H", "W"],
                          dtype=OP_DTYPES[2])
    with Codelet("elem_add", [op1, op2], [out], hag) as cdlt:
        cdlt.configure("start", "SIMD")
        with Loop(0, "N") as n:
            with Loop(0, "C") as c:
                with Loop(0, "H") as h:
                    with Loop(0, "W") as w:
                        cdlt.transfer(op1[n, c, h, w], ["DRAM", "VMEM1"])
                        cdlt.transfer(op2[n, c, h, w], ["DRAM", "VMEM2"])
                        cdlt.compute("ADD", [op1, op2], [out], target="SIMD")
                        cdlt.transfer(out[n, c, h, w], ["VMEM1", "DRAM"])
    return cdlt
Beispiel #9
0
def gemm(hag: ArchitectureNode):
    data = OperandTemplate("data", OP_DTYPES, ["M", "N"], dtype=OP_DTYPES[0])
    weight = OperandTemplate("weight",
                             OP_DTYPES, ["N", "P"],
                             dtype=OP_DTYPES[0])
    bias = OperandTemplate("bias", OP_DTYPES, ["P"], dtype=OP_DTYPES[2])
    out = OperandTemplate("out", OP_DTYPES, ["M", "P"], dtype=OP_DTYPES[2])
    required_params = {}

    with Codelet("gemm", [data, weight, bias], [out],
                 hag,
                 required_params=required_params) as cdlt:

        cdlt.configure("start", "systolic_array")
        cdlt.configure("start", "WBUF")
        cdlt.configure("start", "IBUF")
        cdlt.configure("start", "BBUF")
        cdlt.configure("start", "OBUF")
        with Loop(0, "P") as p:
            with Loop(0, "N") as n:
                with Loop(0, "M") as m:
                    cdlt.transfer(data[m, n], ["DRAM", "IBUF"])
                    cdlt.transfer(weight[n, p], ["DRAM", "WBUF"])
                    cdlt.transfer(bias[p], ["DRAM", "BBUF"])
                    cdlt.transfer(out[m, p], ["DRAM", "OBUF"])
                    cdlt.compute("MVMUL", [data, weight, bias], [out],
                                 target="pe_array")
                    cdlt.transfer(out[m, p], ["OBUF", "DRAM"])

        # TODO: Add store off chip
        cdlt.configure("end", "WBUF")
        cdlt.configure("end", "IBUF")
        cdlt.configure("end", "OBUF")
        cdlt.configure("end", "BBUF")
        cdlt.configure("end", "systolic_array")
    return cdlt