예제 #1
0
    def forward(self, input: Tensor, hx: Tensor) -> Tensor:
        if hx is None:
            hx = torch.zeros(input.size()[0], self.hidden_size, dtype=input.dtype, device=input.device)

        rnncell = FSUMGUCell(self.input_size, self.hidden_size, bias=self.bias, 
                        weight_ext_f=self.weight_f, bias_ext_f=self.bias_f, weight_ext_n=self.weight_n, bias_ext_n=self.bias_n, 
                        hx_buffer=hx, 
                        hwcfg=self.hwcfg, swcfg=self.swcfg).to(input.device)
        
        iSource = BinGen(input, self.hwcfg, self.swcfg)().to(input.device)
        iRNG = RNG(self.hwcfg, self.swcfg)().to(input.device)
        iBSG = BSGen(iSource, iRNG, self.swcfg).to(input.device)

        hSource = BinGen(hx, self.hwcfg, self.swcfg)().to(input.device)
        hRNG = RNG(self.hwcfg, self.swcfg)().to(input.device)
        hBSG = BSGen(hSource, hRNG, self.swcfg).to(input.device)

        oPE = ProgError(torch.zeros(input.size()[0], self.hidden_size, dtype=input.dtype, device=input.device), 
                        self.hwcfg_ope).to(input.device)

        for c in range(2**self.hwcfg["width"]):
            idx = torch.zeros(iSource.size(), dtype=torch.long, device=input.device)
            iBS = iBSG(idx + c)

            hdx = torch.zeros(hSource.size(), dtype=torch.long, device=input.device)
            hBS = hBSG(hdx + c)

            oBS = rnncell(iBS, hBS)
            oPE.Monitor(oBS)

        hy = oPE()[0]
        return hy
예제 #2
0
def test_fsumgu():
    bitwidth_list = [7, 8, 9, 10]
    for bitwidth in bitwidth_list:
        print("bit width:", bitwidth)
        win_sz = 10
        batch = 32
        input_sz = 256
        hidden_sz = 64

        intwidth = 1

        fracwidth = bitwidth - intwidth
        mode = "bipolar"
        depth = bitwidth + 2
        depth_ismul = bitwidth - 4
        rng = "Sobol"
        bias = False
        output_error_only = True

        hwcfg = {
            "width": bitwidth,
            "mode": mode,
            "depth": depth,
            "depth_ismul": depth_ismul,
            "rng": rng,
            "dimr": 1,
            "scale": 1
        }
        swcfg = {
            "btype": torch.float,
            "rtype": torch.float,
            "stype": torch.float
        }

        input = torch.randn(win_sz, batch, input_sz).to(device)
        input = truncated_normal(input, mean=0, std=0.4)
        hx1 = torch.randn(batch, hidden_sz).to(device)
        hx1 = truncated_normal(hx1, mean=0, std=0.1)
        hx2 = hx1.clone().detach().to(device)
        hx3 = hx1.clone().detach().to(device)
        hx4 = hx1.clone().detach().to(device)
        output1 = []
        output2 = []
        output3 = []
        output4 = []

        rnn1 = HardMGUCell(input_sz, hidden_sz, bias=bias,
                           hard=True).to(device)
        rnn3 = HardMGUCellFXP(input_sz,
                              hidden_sz,
                              bias=bias,
                              hard=True,
                              intwidth=intwidth,
                              fracwidth=fracwidth).to(device)
        rnn3.weight_f.data = rnn1.weight_f.clone().detach().to(device)
        rnn3.weight_n.data = rnn1.weight_n.clone().detach().to(device)

        rnn4 = HUBMGUCell(input_sz,
                          hidden_sz,
                          bias=bias,
                          weight_ext_f=rnn1.weight_f,
                          bias_ext_f=rnn1.bias_f,
                          weight_ext_n=rnn1.weight_n,
                          bias_ext_n=rnn1.bias_n,
                          hwcfg=hwcfg).to(device)

        for i in range(win_sz):
            hx1 = rnn1(input[i], hx1)
            output1.append(hx1)

            hx3 = rnn3(input[i], hx3)
            output3.append(hx3)

            hx4 = rnn4(input[i], hx4)
            output4.append(hx4)

            iVec, hVec = input[i], hx2

            # rnn2 in the loop to mimic the hw reset
            rnn2 = FSUMGUCell(input_sz,
                              hidden_sz,
                              bias=bias,
                              weight_ext_f=rnn1.weight_f,
                              bias_ext_f=rnn1.bias_f,
                              weight_ext_n=rnn1.weight_n,
                              bias_ext_n=rnn1.bias_n,
                              hx_buffer=hx2,
                              hwcfg=hwcfg,
                              swcfg=swcfg).to(device)

            iSource = BinGen(iVec, hwcfg, swcfg)().to(device)
            iRNG = RNG(hwcfg, swcfg)().to(device)
            iBSG = BSGen(iSource, iRNG, swcfg).to(device)
            iPE = ProgError(iVec, hwcfg).to(device)

            hSource = BinGen(hVec, hwcfg, swcfg)().to(device)
            hRNG = RNG(hwcfg, swcfg)().to(device)
            hBSG = BSGen(hSource, hRNG, swcfg).to(device)
            hPE = ProgError(hVec, hwcfg).to(device)

            oVec = output1[i]
            oPE = ProgError(oVec, hwcfg).to(device)

            fg_ug_in_PE = ProgError(rnn1.fg_ug_in, hwcfg).to(device)
            fg_in_PE = ProgError(rnn1.fg_in, hwcfg).to(device)
            fg_PE = ProgError(rnn1.fg, hwcfg).to(device)
            fg_hx_PE = ProgError(rnn1.fg_hx, hwcfg).to(device)
            ng_ug_in_PE = ProgError(rnn1.ng_ug_in, hwcfg).to(device)
            ng_PE = ProgError(rnn1.ng, hwcfg).to(device)
            fg_ng_PE = ProgError(rnn1.fg_ng, hwcfg).to(device)
            fg_ng_inv_PE = ProgError(rnn1.fg_ng_inv, hwcfg).to(device)

            for c in range(2**bitwidth):
                idx = torch.zeros(iSource.size()).type(torch.long).to(device)
                iBS = iBSG(idx + c)
                iPE.Monitor(iBS)

                hdx = torch.zeros(hSource.size()).type(torch.long).to(device)
                hBS = hBSG(hdx + c)
                hPE.Monitor(hBS)

                start_time = time.time()

                oBS = rnn2(iBS, hBS)

                fg_ug_in_PE.Monitor(rnn2.fg_ug_in)
                fg_in_PE.Monitor(rnn2.fg_in)
                fg_PE.Monitor(rnn2.fg)
                fg_hx_PE.Monitor(rnn2.fg_hx)
                ng_ug_in_PE.Monitor(rnn2.ng_ug_in)
                ng_PE.Monitor(rnn2.ng)
                fg_ng_PE.Monitor(rnn2.fg_ng)
                fg_ng_inv_PE.Monitor(rnn2.fg_ng_inv)

                oPE.Monitor(oBS)

            hx2 = oPE()[0]
            output2.append(hx2)

            # print("======>> window: " + str(i) + "<<======")
            # print("--- %s seconds ---" % (time.time() - start_time))
            if output_error_only:
                pass
            else:
                progerror_report(iPE, "input")
                progerror_report(hPE, "hidden")

                progerror_report(fg_ug_in_PE, "fg_ug_in")
                progerror_report(fg_in_PE, "fg_in")
                progerror_report(fg_PE, "fg")
                progerror_report(fg_hx_PE, "fg_hx")
                progerror_report(ng_ug_in_PE, "ng_ug_in")
                progerror_report(ng_PE, "ng")
                progerror_report(fg_ng_PE, "fg_ng")
                progerror_report(fg_ng_inv_PE, "fg_ng_inv")

            progerror_report(oPE, str(i) + "-th win output fsu")

            hub_err = hx1 - hx4
            min = hub_err.min().item()
            max = hub_err.max().item()
            rmse = torch.sqrt(torch.mean(torch.square(hub_err)))
            std, mean = torch.std_mean(hub_err)
            print("{:30s}".format(str(i)+"-th win output hub") + \
                    ", Absolute Error range," + "{:12f}".format(min) + ", {:12f}".format(max) + \
                    ", std," + "{:12f}".format(std) + \
                    ", mean," + "{:12f}".format(mean) + \
                    ", rmse," + "{:12f}".format(rmse))

            fxp_err = hx1 - hx3
            min = fxp_err.min().item()
            max = fxp_err.max().item()
            rmse = torch.sqrt(torch.mean(torch.square(fxp_err)))
            std, mean = torch.std_mean(fxp_err)
            print("{:30s}".format(str(i)+"-th win output fxp") + \
                    ", Absolute Error range," + "{:12f}".format(min) + ", {:12f}".format(max) + \
                    ", std," + "{:12f}".format(std) + \
                    ", mean," + "{:12f}".format(mean) + \
                    ", rmse," + "{:12f}".format(rmse))

        print()
예제 #3
0
def test_fsuadd():
    hwcfg = {
        "width": 12,
        "mode": "bipolar",
        "dimr": 1,
        "dima": 0,
        "rng": "sobol",
        "scale": 1,
        "depth": 20,
        "entry": None
    }
    swcfg = {"rtype": torch.float, "stype": torch.float, "btype": torch.float}
    bitwidth = hwcfg["width"]
    rng = hwcfg["rng"]

    plot_en = False
    modes = ["bipolar", "unipolar"]
    size = [128, 256, 512]

    scaled = [True, False]
    result_pe = []

    for mode in modes:
        for scale in scaled:
            run_time = 0
            acc_dim = hwcfg["dima"]
            scale_mod = size[acc_dim]
            result_pe_cycle = []
            hwcfg["mode"] = mode
            hwcfg["scale"] = scale_mod if scale else 1
            uadd = FSUAdd(hwcfg, swcfg).to(device)

            if mode == "unipolar":
                iVec = torch.rand(size).mul(2**bitwidth).round().div(
                    2**bitwidth).to(device)
            elif mode == "bipolar":
                iVec = torch.rand(size).mul(2).sub(1).mul(
                    2**bitwidth).round().div(2**bitwidth).to(device)

            oVec = torch.sum(iVec, acc_dim).to(device)

            iVecSource = BinGen(iVec, hwcfg, swcfg)().to(device)
            iVecRNG = RNG(hwcfg, swcfg)().to(device)
            iVecBS = BSGen(iVecSource, iVecRNG, swcfg).to(device)
            hwcfg["scale"] = 1
            iVecPE = ProgError(iVec, hwcfg).to(device)
            print("iVecPE cfg", iVecPE.hwcfg)
            hwcfg["scale"] = scale_mod if scale else 1
            oVecPE = ProgError(oVec, hwcfg).to(device)
            print("oVecPE cfg", oVecPE.hwcfg)

            with torch.no_grad():
                idx = torch.zeros(iVecSource.size()).type(
                    torch.long).to(device)
                for i in range(2**bitwidth):
                    iBS = iVecBS(idx + i)
                    iVecPE.Monitor(iBS)

                    start_time = time.time()
                    oVecU = uadd(iBS)
                    run_time = time.time() - start_time + run_time

                    if i == 0:
                        print("uadd cfg", uadd.hwcfg)

                    oVecPE.Monitor(oVecU)
                    rmse = torch.sqrt(
                        torch.mean(torch.mul(oVecPE()[1],
                                             oVecPE()[1])))
                    result_pe_cycle.append(1 - rmse.item())
                print("--- %s seconds ---" % (time.time() - start_time))
                print("RNG: " + rng + ", data: " + mode + ", scaled: " +
                      str(scale))
                print("input error:  ", "min: ",
                      torch.min(iVecPE()[1]).item(), "max: ",
                      torch.max(iVecPE()[1]).item())
                print("output error: ", "min: ",
                      torch.min(oVecPE()[1]).item(), "max: ",
                      torch.max(oVecPE()[1]).item(), "RMSE: ", rmse.item())
                print()
                if plot_en is True:
                    result_pe = oVecPE()[1].cpu().numpy()
                    print("error distribution=========>")
                    plt.figure(figsize=(3, 1.5))
                    fig = plt.hist(
                        result_pe.flatten(),
                        bins='auto')  # arguments are passed to np.histogram
                    plt.show()
                    print("progressive accuracy=========>")
                    plt.figure(figsize=(3, 1.5))
                    fig = plt.plot(result_pe_cycle
                                   )  # arguments are passed to np.histogram
                    plt.show()
예제 #4
0
def test_bi2uni():
    hwcfg = {
        "width": 8,
        "mode": "bipolar",
        "dimr": 1,
        "rng": "sobol",
        "scale": 1,
        "depth": 3
    }
    swcfg = {"rtype": torch.float, "stype": torch.float, "btype": torch.float}
    bitwidth = hwcfg["width"]
    mode = hwcfg["mode"]

    rng = "Sobol"

    in_dim = 1024
    bitwidth = 8
    in_mode = "bipolar"
    out_mode = "unipolar"
    stype = torch.float
    btype = torch.float
    rtype = torch.float

    uBi2Uni = Bi2Uni(hwcfg, swcfg).to(device)

    iVec = ((torch.rand(in_dim) * (2**bitwidth)).round() /
            (2**bitwidth)).to(device)
    start_time = time.time()
    oVec = iVec.type(torch.float)
    print("--- %s seconds ---" % (((time.time() - start_time)) * 2**bitwidth))

    print("input", iVec)
    print("real output", oVec)

    hwcfg["mode"] = "bipolar"
    iVecSource = BinGen(iVec, hwcfg, swcfg)().to(device)

    iVecRNG = RNG(hwcfg, swcfg)().to(device)
    iVecBS = BSGen(iVecSource, iVecRNG, swcfg).to(device)

    iVecPE = ProgError(iVec, hwcfg).to(device)
    hwcfg["mode"] = "unipolar"
    oVecPE = ProgError(oVec, hwcfg).to(device)

    hwcfg["mode"] = "bipolar"

    with torch.no_grad():
        idx = torch.zeros(iVecSource.size()).type(torch.long).to(device)
        start_time = time.time()
        for i in range((2**bitwidth)):
            iBS = iVecBS(idx + i)
            iVecPE.Monitor(iBS)

            oVecU = uBi2Uni(iBS)
            oVecPE.Monitor(oVecU)
        print("--- %s seconds ---" % (time.time() - start_time))
        print("final input error: ", min(iVecPE()[1]), max(iVecPE()[1]))
        print("final output error:", min(oVecPE()[1]), max(oVecPE()[1]))
        print("final output pp:", oVecPE()[0].data)
        print("final output pe:", oVecPE()[1].data)
        print("final output mean error:", oVecPE()[1].mean())

        result_pe = oVecPE()[1].cpu().numpy()

    # fig = plt.hist(result_pe, bins='auto')  # arguments are passed to np.histogram
    # plt.title("Histogram for final output error")
    # plt.show()

    print(result_pe)
    print(result_pe.argmin(), result_pe.argmax())
    print(result_pe[result_pe.argmin()], result_pe[result_pe.argmax()])
    print(iVec[result_pe.argmin()], iVec[result_pe.argmax()])
예제 #5
0
def test_fsuconv2d():
    plot_en = False

    hwcfg_input = {"width": 8, "rng": "Sobol", "dimr": 1}
    hwcfg = {
        "width": 8,
        "mode": "bipolar",
        "scale": None,
        "depth": 20,
        "rng": "Sobol",
        "dimr": 1
    }
    swcfg = {"btype": torch.float, "rtype": torch.float, "stype": torch.float}

    rng = hwcfg["rng"]

    in_channels = 32
    out_channels = 16
    kernel_size = 3
    stride = 2
    padding = 0
    dilation = 1
    groups = 1
    bias = True
    padding_mode = 'zeros'

    modes = ["bipolar", "unipolar"]
    scaled = [True, False]
    result_pe = []

    for mode in modes:
        for scale in scaled:
            hwcfg["mode"] = mode
            hwcfg_input["mode"] = mode
            hwcfg["scale"] = (kernel_size * kernel_size * in_channels +
                              bias) if scale else 1

            length = 2**hwcfg["width"]
            length_input = 2**hwcfg_input["width"]
            result_pe_cycle = []
            conv2d = torch.nn.Conv2d(in_channels,
                                     out_channels,
                                     kernel_size,
                                     stride=stride,
                                     padding=padding,
                                     dilation=dilation,
                                     groups=groups,
                                     bias=bias,
                                     padding_mode=padding_mode).to(device)

            if mode == "unipolar":
                conv2d.weight.data = torch.rand(
                    out_channels, in_channels, kernel_size,
                    kernel_size).mul(length).round().div(length).to(device)
                if bias is True:
                    conv2d.bias.data = torch.rand(out_channels).mul(
                        length).round().div(length).to(device)
            elif mode == "bipolar":
                conv2d.weight.data = torch.rand(
                    out_channels, in_channels, kernel_size, kernel_size).mul(
                        2).sub(1).mul(length).round().div(length).to(device)
                if bias is True:
                    conv2d.bias.data = torch.rand(out_channels).mul(2).sub(
                        1).mul(length).round().div(length).to(device)

            uconv2d = FSUConv2d(in_channels,
                                out_channels,
                                kernel_size,
                                stride=stride,
                                padding=padding,
                                dilation=dilation,
                                groups=groups,
                                bias=bias,
                                padding_mode=padding_mode,
                                weight_ext=conv2d.weight,
                                bias_ext=conv2d.bias,
                                hwcfg=hwcfg,
                                swcfg=swcfg).to(device)

            input_size = (128, 32)
            iVec = (
                (torch.rand(32, in_channels, input_size[0], input_size[1]) *
                 length_input).round() / length_input).to(device)
            oVec = conv2d(iVec)

            iVecSource = BinGen(iVec, hwcfg, swcfg)().to(device)
            iVecRNG = RNG(hwcfg_input, swcfg)().to(device)
            iVecBS = BSGen(iVecSource, iVecRNG, swcfg).to(device)

            hwcfg["scale"] = 1
            iVecPE = ProgError(iVec, hwcfg).to(device)

            hwcfg["scale"] = (kernel_size * kernel_size * in_channels +
                              bias) if scale else 1
            oVecPE = ProgError(oVec, hwcfg).to(device)

            with torch.no_grad():
                idx = torch.zeros(iVecSource.size()).type(
                    torch.long).to(device)
                start_time = time.time()
                for i in range(length):
                    iBS = iVecBS(idx + i)
                    iVecPE.Monitor(iBS)

                    oVecU = uconv2d(iBS)
                    oVecPE.Monitor(oVecU)
                    rmse = torch.sqrt(
                        torch.sum(torch.mul(oVecPE()[1],
                                            oVecPE()[1])) /
                        torch.prod(torch.tensor(oVecPE()[1].size())))
                    if plot_en is True:
                        result_pe_cycle.append(1 - rmse.item())
                print("--- %s seconds ---" % (time.time() - start_time))
                print("RNG: " + rng + ", data: " + mode + ", scaled: " +
                      str(scale))
                print("input error:  ", "min: ",
                      torch.min(iVecPE()[1]).item(), "max: ",
                      torch.max(iVecPE()[1]).item())
                print("output error: ", "min: ",
                      torch.min(oVecPE()[1]).item(), "max: ",
                      torch.max(oVecPE()[1]).item(), "RMSE: ", rmse.item())
                print()
                if plot_en is True:
                    result_pe = oVecPE()[1].cpu().numpy()
                    print("error distribution=========>")
                    plt.figure(figsize=(3, 1.5))
                    fig = plt.hist(
                        result_pe.flatten(),
                        bins='auto')  # arguments are passed to np.histogram
                    plt.show()
                    print("progressive accuracy=========>")
                    plt.figure(figsize=(3, 1.5))
                    fig = plt.plot(result_pe_cycle
                                   )  # arguments are passed to np.histogram
                    plt.show()
예제 #6
0
def test_fsudiv():
    hwcfg = {
        "width": 8,
        "mode": "unipolar",
        "rng": "Sobol",
        "dimr": 4,
        "scale": 1,
        "depth_sa": 3,
        "depth_ss": 2,
        "entry_kn": 2
    }
    swcfg = {"rtype": torch.float, "stype": torch.float, "btype": torch.float}

    bitwidth = hwcfg["width"]
    mode = hwcfg["mode"]
    total_cnt = 1
    savepdf = False
    stype = swcfg["stype"]
    btype = swcfg["btype"]
    rtype = swcfg["rtype"]

    print("========================================================")
    print(mode)
    print("========================================================")
    if mode == "unipolar":
        # all values in unipolar are non-negative
        # dividend is always non greater than divisor
        # divisor is non-zero
        low_bound = 0
        up_bound = 2**bitwidth
    elif mode == "bipolar":
        # values in bipolar are arbitrarily positive or negative
        # abs of dividend is always non greater than abs of divisor
        # abs of divisor is non-zero
        low_bound = -2**(bitwidth - 1)
        up_bound = 2**(bitwidth - 1)

    divisor_list = []
    dividend_list = []
    for divisor_val in range(up_bound, low_bound - 1, -1):
        divisor_list.append([])
        dividend_list.append([])
        for dividend_val in range(low_bound, up_bound + 1, 1):
            divisor_list[up_bound - divisor_val].append(divisor_val)
            dividend_list[up_bound - divisor_val].append(dividend_val)

    dividend = torch.tensor(dividend_list).type(
        torch.float).div(up_bound).to(device)
    divisor = torch.tensor(divisor_list).type(
        torch.float).div(up_bound).to(device)
    quotient = dividend.div(divisor)

    # find the invalid postions in quotient
    quotient_nan = torch.isnan(quotient)
    quotient_inf = torch.isinf(quotient)
    quotient_mask = quotient_nan + quotient_inf
    quotient[quotient_mask] = 0
    quotient = quotient.clamp(-1, 1)

    result_pe_total = []
    for rand_idx in range(1, total_cnt + 1):
        quotientPE = ProgError(quotient, hwcfg).to(device)

        dividendPE = ProgError(dividend, hwcfg).to(device)
        dividendSRC = BinGen(dividend, hwcfg, swcfg)().to(device)

        divisorPE = ProgError(divisor, hwcfg).to(device)
        divisorSRC = BinGen(divisor, hwcfg, swcfg)().to(device)

        dut_div = FSUDiv(hwcfg, swcfg).to(device)

        hwcfg["dimr"] = 1
        dividendRNG = RNG(hwcfg, swcfg)().to(device)
        dividendBS = BSGen(dividendSRC, dividendRNG, swcfg).to(device)
        divisorRNG = RNG(hwcfg, swcfg)().to(device)
        divisorBS = BSGen(divisorSRC, divisorRNG, swcfg).to(device)
        with torch.no_grad():
            start_time = time.time()
            for i in range(2**bitwidth):
                dividend_bs = dividendBS(torch.tensor([i]))
                dividendPE.Monitor(dividend_bs)

                divisor_bs = divisorBS(torch.tensor([i]))
                divisorPE.Monitor(divisor_bs)

                quotient_bs = dut_div(dividend_bs, divisor_bs)
                quotientPE.Monitor(quotient_bs)

        # get the result for different rng
        result_pe = quotientPE()[1].cpu().numpy()
        result_pe[quotient_mask.cpu().numpy()] = np.nan
        result_pe_total.append(result_pe)

    # get the result for different rng
    result_pe_total = np.array(result_pe_total)

    #######################################################################
    # check the error of all simulation
    #######################################################################
    result_pe_total_no_nan = result_pe_total[~np.isnan(result_pe_total)]
    print("RMSE:{:1.4}".format(math.sqrt(np.mean(result_pe_total_no_nan**2))))
    print("MAE: {:1.4}".format(np.mean(np.abs(result_pe_total_no_nan))))
    print("bias:{:1.4}".format(np.mean(result_pe_total_no_nan)))
    print("max: {:1.4}".format(np.max(result_pe_total_no_nan)))
    print("min: {:1.4}".format(np.min(result_pe_total_no_nan)))
예제 #7
0
def test_fsumul():
    hwcfg = {
        "width": 8,
        "mode": "bipolar",
        "dimr": 1,
        "dima": 0,
        "rng": "sobol",
        "scale": 1,
        "depth": 10,
        "entry": None,
        "static": True
    }
    swcfg = {"rtype": torch.float, "stype": torch.float, "btype": torch.float}
    bitwidth = hwcfg["width"]

    col = 100
    modes = ["bipolar", "unipolar"]

    for mode in modes:
        if mode == "unipolar":
            input_prob = torch.rand(col).mul(2**bitwidth).round().div(
                2**bitwidth).to(device)
            iVec = torch.rand(col).mul(2**bitwidth).round().div(
                2**bitwidth).to(device)
        elif mode == "bipolar":
            input_prob = torch.rand(col).mul(2).sub(1).mul(
                2**bitwidth).round().div(2**bitwidth).to(device)
            iVec = torch.rand(col).mul(2).sub(1).mul(2**bitwidth).round().div(
                2**bitwidth).to(device)

        hwcfg["mode"] = mode

        dut_mul = FSUMul(input_prob, hwcfg, swcfg).to(device)

        oVec = torch.mul(iVec, input_prob).mul(2**bitwidth).round().div(
            2**bitwidth).to(device)

        iVecPE = ProgError(iVec, hwcfg).to(device)
        oVecPE = ProgError(oVec, hwcfg).to(device)

        iVecSource = BinGen(iVec, hwcfg, swcfg)().to(device)
        iVecRNG = RNG(hwcfg, swcfg)().to(device)
        iVecBS = BSGen(iVecSource, iVecRNG, swcfg).to(device)

        with torch.no_grad():
            start_time = time.time()
            for i in range(
                    2**
                    bitwidth):  # unary cycle count 2^n for n-bit binary data
                iBS = iVecBS(torch.tensor([i]))  # input bit stream generation
                iVecPE.Monitor(iBS)  # input accuracy measurement
                oVecU = dut_mul(iBS)  # computing kernel, e.g., multiplication
                oVecPE.Monitor(oVecU)  # output accuracy measurement
            print("--- %s seconds ---" % (time.time() - start_time))
            print("input error: ", torch.min(iVecPE()[1]),
                  torch.max(iVecPE()[1]))
            print("output error: ", torch.min(oVecPE()[1]),
                  torch.max(oVecPE()[1]))
            result_pe = oVecPE()[1].cpu().numpy()
            print("RMSE", math.sqrt(sum(result_pe**2) / len(result_pe)))
            print("bias", sum(result_pe) / len(result_pe))
예제 #8
0
def test_fsusqrt(mode="unipolar",
                 bitwidth=8,
                 emit=True,
                 jk_trace=False,
                 depth_kernel=1,
                 depth_sr=4,
                 savepdf=False,
                 total_cnt=1):
    hwcfg = {
        "width": bitwidth,
        "mode": mode,
        "dima": 0,
        "scale": 1,
        "depth": 10,
        "entry": None,
        "jk_trace": jk_trace,
        "emit": emit,
        "entry_kn": depth_kernel,
        "entry_sr": depth_sr,
        "rng": "Sobol",
        "dimr": 4
    }
    swcfg = {"rtype": torch.float, "stype": torch.float, "btype": torch.float}
    bitwidth = hwcfg["width"]
    rng = hwcfg["rng"]

    print("========================================================")
    print(mode)
    print("========================================================")
    # all input values are non-negative
    low_bound = 0
    if mode == "unipolar":
        up_bound = 2**bitwidth
    elif mode == "bipolar":
        low_bound = 0
        up_bound = 2**(bitwidth - 1)

    input_list = []
    for input_val in range(low_bound, up_bound + 1, 1):
        input_list.append(input_val)

    input = torch.tensor(input_list).type(torch.float).div(up_bound).to(device)

    output = torch.sqrt(input).to(device)

    result_pe_total = []
    for rand_idx in range(1, total_cnt + 1):
        outputPE = ProgError(output, hwcfg).to(device)

        inputPE = ProgError(input, hwcfg).to(device)
        inputSRC = BinGen(input, hwcfg, swcfg)().to(device)

        dut_sqrt = FSUSqrt(hwcfg, swcfg).to(device)

        inputRNG = RNG(hwcfg, swcfg)().to(device)
        inputBS = BSGen(inputSRC, inputRNG, swcfg).to(device)
        with torch.no_grad():
            start_time = time.time()
            for i in range(2**bitwidth):
                input_bs = inputBS(torch.tensor([i]))
                inputPE.Monitor(input_bs)

                ouyput_bs = dut_sqrt(input_bs)
                outputPE.Monitor(ouyput_bs)

        # get the result for different rng
        result_pe = outputPE()[1].cpu().numpy()
        result_pe_total.append(result_pe)

    # get the result for different rng
    result_pe_total = np.array(result_pe_total)

    #######################################################################
    # check the error of all simulation
    #######################################################################
    print("RMSE:{:1.4}".format(math.sqrt(np.mean(result_pe_total**2))))
    print("MAE: {:1.4}".format(np.mean(np.abs(result_pe_total))))
    print("bias:{:1.4}".format(np.mean(result_pe_total)))
    print("max: {:1.4}".format(np.max(result_pe_total)))
    print("min: {:1.4}".format(np.min(result_pe_total)))
def test_fsumul_in_stream():
    bitwidth = 12
    depth = 4
    hwcfg = {
        "width": bitwidth,
        "mode": "bipolar",
        "dimr": 1,
        "dima": 0,
        "rng": "sobol",
        "scale": 1,
        "depth": 10,
        "entry": None,
        "static": False
    }
    swcfg = {"rtype": torch.float, "stype": torch.float, "btype": torch.float}

    col = 100
    modes = ["bipolar", "unipolar"]

    for mode in modes:
        if mode == "unipolar":
            input_prob_0 = torch.rand(col).mul(2**bitwidth).round().div(
                2**bitwidth).to(device)
            input_prob_1 = torch.rand(col).mul(2**bitwidth).round().div(
                2**bitwidth).to(device)
        elif mode == "bipolar":
            input_prob_0 = torch.rand(col).mul(2).sub(1).mul(
                2**bitwidth).round().div(2**bitwidth).to(device)
            input_prob_1 = torch.rand(col).mul(2).sub(1).mul(
                2**bitwidth).round().div(2**bitwidth).to(device)

        hwcfg["mode"] = mode

        hwcfg["width"] = depth
        dut_mul = FSUMul(None, hwcfg, swcfg).to(device)
        hwcfg["width"] = bitwidth

        oVec = torch.mul(input_prob_0, input_prob_1).mul(
            2**bitwidth).round().div(2**bitwidth).to(device)

        prob_0_PE = ProgError(input_prob_0, hwcfg).to(device)
        prob_1_PE = ProgError(input_prob_1, hwcfg).to(device)

        oVecPE = ProgError(oVec, hwcfg).to(device)

        prob_0_Source = BinGen(input_prob_0, hwcfg, swcfg)().to(device)
        prob_1_Source = BinGen(input_prob_1, hwcfg, swcfg)().to(device)

        iVecRNG0 = RNG(hwcfg, swcfg)().to(device)
        iVecRNG1 = RNG(hwcfg, swcfg)().to(device)
        prob_0_BS = BSGen(prob_0_Source, iVecRNG0, swcfg).to(device)
        prob_1_BS = BSGen(prob_1_Source, iVecRNG1, swcfg).to(device)

        with torch.no_grad():
            start_time = time.time()
            idx = torch.zeros(input_prob_0.size()).type(torch.long).to(device)
            for i in range(2**bitwidth):
                #print(i)
                iBS_0 = prob_0_BS(idx + i)
                iBS_1 = prob_1_BS(idx + i)

                prob_0_PE.Monitor(iBS_0)
                prob_1_PE.Monitor(iBS_1)

                oVecU = dut_mul(iBS_0, iBS_1)
                oVecPE.Monitor(oVecU)
            print("--- %s seconds ---" % (time.time() - start_time))
            print(mode)
            print("input 0 error: ", "min:", torch.min(prob_0_PE()[1]), "max:",
                  torch.max(prob_0_PE()[1]))
            print("input 1 error: ", "min:", torch.min(prob_1_PE()[1]), "max:",
                  torch.max(prob_1_PE()[1]))

            print("output error: ", "min:", torch.min(oVecPE()[1]), "max:",
                  torch.max(oVecPE()[1]), "rmse:",
                  torch.sqrt(torch.mean(torch.mul(oVecPE()[1],
                                                  oVecPE()[1]))), "bias:",
                  torch.mean(oVecPE()[1]))
예제 #10
0
def test_fsusignabs():
    total_cnt = 5
    hwcfg = {
        "width": 8,
        "mode": "bipolar",
        "dimr": 1,
        "rng": "sobol",
        "scale": 1,
        "depth": 5
    }
    swcfg = {"rtype": torch.float, "stype": torch.float, "btype": torch.float}
    bitwidth = hwcfg["width"]
    mode = hwcfg["mode"]

    print("========================================================")
    print(mode)
    print("========================================================")
    # all input values are non-negative
    low_bound = 0
    if mode == "unipolar":
        up_bound = 2**bitwidth
    elif mode == "bipolar":
        low_bound = -2**(bitwidth - 1)
        up_bound = 2**(bitwidth - 1)

    input_list = []
    for input_val in range(low_bound, up_bound + 1, 1):
        input_list.append(input_val)

    input = torch.tensor(input_list).type(torch.float).div(up_bound).to(device)
    #     input = torch.tensor([-1/256]).type(torch.float).div(up_bound).to(device)

    output = torch.abs(input).to(device)

    result_pe_total = []
    for rand_idx in range(1, total_cnt + 1):
        outputPE = ProgError(output, hwcfg).to(device)

        inputPE = ProgError(input, hwcfg).to(device)
        inputSRC = BinGen(input, hwcfg, swcfg)().to(device)

        dut = FSUSignAbs(hwcfg, swcfg).to(device)

        inputRNG = RNG(hwcfg, swcfg)().to(device)
        inputBS = BSGen(inputSRC, inputRNG, swcfg).to(device)
        with torch.no_grad():
            start_time = time.time()
            for i in range(2**bitwidth):
                input_bs = inputBS(torch.tensor([i]))
                inputPE.Monitor(input_bs)

                _, output_bs = dut(input_bs)
                outputPE.Monitor(output_bs)

        # get the result for different rng
        result_pe = outputPE()[1].cpu().numpy()
        result_pe_total.append(result_pe)

    # get the result for different rng
    result_pe_total = np.array(result_pe_total)

    #######################################################################
    # check the error of all simulation
    #######################################################################
    print("RMSE:{:1.4}".format(np.sqrt(np.mean(result_pe_total**2))))
    print("MAE: {:1.4}".format(np.mean(np.abs(result_pe_total))))
    print("bias:{:1.4}".format(np.mean(result_pe_total)))
    print("max: {:1.4}".format(np.max(result_pe_total)))
    print("min: {:1.4}".format(np.min(result_pe_total)))