def forward(self, input: Tensor, hx: Tensor) -> Tensor: if hx is None: hx = torch.zeros(input.size()[0], self.hidden_size, dtype=input.dtype, device=input.device) rnncell = FSUMGUCell(self.input_size, self.hidden_size, bias=self.bias, weight_ext_f=self.weight_f, bias_ext_f=self.bias_f, weight_ext_n=self.weight_n, bias_ext_n=self.bias_n, hx_buffer=hx, hwcfg=self.hwcfg, swcfg=self.swcfg).to(input.device) iSource = BinGen(input, self.hwcfg, self.swcfg)().to(input.device) iRNG = RNG(self.hwcfg, self.swcfg)().to(input.device) iBSG = BSGen(iSource, iRNG, self.swcfg).to(input.device) hSource = BinGen(hx, self.hwcfg, self.swcfg)().to(input.device) hRNG = RNG(self.hwcfg, self.swcfg)().to(input.device) hBSG = BSGen(hSource, hRNG, self.swcfg).to(input.device) oPE = ProgError(torch.zeros(input.size()[0], self.hidden_size, dtype=input.dtype, device=input.device), self.hwcfg_ope).to(input.device) for c in range(2**self.hwcfg["width"]): idx = torch.zeros(iSource.size(), dtype=torch.long, device=input.device) iBS = iBSG(idx + c) hdx = torch.zeros(hSource.size(), dtype=torch.long, device=input.device) hBS = hBSG(hdx + c) oBS = rnncell(iBS, hBS) oPE.Monitor(oBS) hy = oPE()[0] return hy
def test_fsumgu(): bitwidth_list = [7, 8, 9, 10] for bitwidth in bitwidth_list: print("bit width:", bitwidth) win_sz = 10 batch = 32 input_sz = 256 hidden_sz = 64 intwidth = 1 fracwidth = bitwidth - intwidth mode = "bipolar" depth = bitwidth + 2 depth_ismul = bitwidth - 4 rng = "Sobol" bias = False output_error_only = True hwcfg = { "width": bitwidth, "mode": mode, "depth": depth, "depth_ismul": depth_ismul, "rng": rng, "dimr": 1, "scale": 1 } swcfg = { "btype": torch.float, "rtype": torch.float, "stype": torch.float } input = torch.randn(win_sz, batch, input_sz).to(device) input = truncated_normal(input, mean=0, std=0.4) hx1 = torch.randn(batch, hidden_sz).to(device) hx1 = truncated_normal(hx1, mean=0, std=0.1) hx2 = hx1.clone().detach().to(device) hx3 = hx1.clone().detach().to(device) hx4 = hx1.clone().detach().to(device) output1 = [] output2 = [] output3 = [] output4 = [] rnn1 = HardMGUCell(input_sz, hidden_sz, bias=bias, hard=True).to(device) rnn3 = HardMGUCellFXP(input_sz, hidden_sz, bias=bias, hard=True, intwidth=intwidth, fracwidth=fracwidth).to(device) rnn3.weight_f.data = rnn1.weight_f.clone().detach().to(device) rnn3.weight_n.data = rnn1.weight_n.clone().detach().to(device) rnn4 = HUBMGUCell(input_sz, hidden_sz, bias=bias, weight_ext_f=rnn1.weight_f, bias_ext_f=rnn1.bias_f, weight_ext_n=rnn1.weight_n, bias_ext_n=rnn1.bias_n, hwcfg=hwcfg).to(device) for i in range(win_sz): hx1 = rnn1(input[i], hx1) output1.append(hx1) hx3 = rnn3(input[i], hx3) output3.append(hx3) hx4 = rnn4(input[i], hx4) output4.append(hx4) iVec, hVec = input[i], hx2 # rnn2 in the loop to mimic the hw reset rnn2 = FSUMGUCell(input_sz, hidden_sz, bias=bias, weight_ext_f=rnn1.weight_f, bias_ext_f=rnn1.bias_f, weight_ext_n=rnn1.weight_n, bias_ext_n=rnn1.bias_n, hx_buffer=hx2, hwcfg=hwcfg, swcfg=swcfg).to(device) iSource = BinGen(iVec, hwcfg, swcfg)().to(device) iRNG = RNG(hwcfg, swcfg)().to(device) iBSG = BSGen(iSource, iRNG, swcfg).to(device) iPE = ProgError(iVec, hwcfg).to(device) hSource = BinGen(hVec, hwcfg, swcfg)().to(device) hRNG = RNG(hwcfg, swcfg)().to(device) hBSG = BSGen(hSource, hRNG, swcfg).to(device) hPE = ProgError(hVec, hwcfg).to(device) oVec = output1[i] oPE = ProgError(oVec, hwcfg).to(device) fg_ug_in_PE = ProgError(rnn1.fg_ug_in, hwcfg).to(device) fg_in_PE = ProgError(rnn1.fg_in, hwcfg).to(device) fg_PE = ProgError(rnn1.fg, hwcfg).to(device) fg_hx_PE = ProgError(rnn1.fg_hx, hwcfg).to(device) ng_ug_in_PE = ProgError(rnn1.ng_ug_in, hwcfg).to(device) ng_PE = ProgError(rnn1.ng, hwcfg).to(device) fg_ng_PE = ProgError(rnn1.fg_ng, hwcfg).to(device) fg_ng_inv_PE = ProgError(rnn1.fg_ng_inv, hwcfg).to(device) for c in range(2**bitwidth): idx = torch.zeros(iSource.size()).type(torch.long).to(device) iBS = iBSG(idx + c) iPE.Monitor(iBS) hdx = torch.zeros(hSource.size()).type(torch.long).to(device) hBS = hBSG(hdx + c) hPE.Monitor(hBS) start_time = time.time() oBS = rnn2(iBS, hBS) fg_ug_in_PE.Monitor(rnn2.fg_ug_in) fg_in_PE.Monitor(rnn2.fg_in) fg_PE.Monitor(rnn2.fg) fg_hx_PE.Monitor(rnn2.fg_hx) ng_ug_in_PE.Monitor(rnn2.ng_ug_in) ng_PE.Monitor(rnn2.ng) fg_ng_PE.Monitor(rnn2.fg_ng) fg_ng_inv_PE.Monitor(rnn2.fg_ng_inv) oPE.Monitor(oBS) hx2 = oPE()[0] output2.append(hx2) # print("======>> window: " + str(i) + "<<======") # print("--- %s seconds ---" % (time.time() - start_time)) if output_error_only: pass else: progerror_report(iPE, "input") progerror_report(hPE, "hidden") progerror_report(fg_ug_in_PE, "fg_ug_in") progerror_report(fg_in_PE, "fg_in") progerror_report(fg_PE, "fg") progerror_report(fg_hx_PE, "fg_hx") progerror_report(ng_ug_in_PE, "ng_ug_in") progerror_report(ng_PE, "ng") progerror_report(fg_ng_PE, "fg_ng") progerror_report(fg_ng_inv_PE, "fg_ng_inv") progerror_report(oPE, str(i) + "-th win output fsu") hub_err = hx1 - hx4 min = hub_err.min().item() max = hub_err.max().item() rmse = torch.sqrt(torch.mean(torch.square(hub_err))) std, mean = torch.std_mean(hub_err) print("{:30s}".format(str(i)+"-th win output hub") + \ ", Absolute Error range," + "{:12f}".format(min) + ", {:12f}".format(max) + \ ", std," + "{:12f}".format(std) + \ ", mean," + "{:12f}".format(mean) + \ ", rmse," + "{:12f}".format(rmse)) fxp_err = hx1 - hx3 min = fxp_err.min().item() max = fxp_err.max().item() rmse = torch.sqrt(torch.mean(torch.square(fxp_err))) std, mean = torch.std_mean(fxp_err) print("{:30s}".format(str(i)+"-th win output fxp") + \ ", Absolute Error range," + "{:12f}".format(min) + ", {:12f}".format(max) + \ ", std," + "{:12f}".format(std) + \ ", mean," + "{:12f}".format(mean) + \ ", rmse," + "{:12f}".format(rmse)) print()
def test_fsuadd(): hwcfg = { "width": 12, "mode": "bipolar", "dimr": 1, "dima": 0, "rng": "sobol", "scale": 1, "depth": 20, "entry": None } swcfg = {"rtype": torch.float, "stype": torch.float, "btype": torch.float} bitwidth = hwcfg["width"] rng = hwcfg["rng"] plot_en = False modes = ["bipolar", "unipolar"] size = [128, 256, 512] scaled = [True, False] result_pe = [] for mode in modes: for scale in scaled: run_time = 0 acc_dim = hwcfg["dima"] scale_mod = size[acc_dim] result_pe_cycle = [] hwcfg["mode"] = mode hwcfg["scale"] = scale_mod if scale else 1 uadd = FSUAdd(hwcfg, swcfg).to(device) if mode == "unipolar": iVec = torch.rand(size).mul(2**bitwidth).round().div( 2**bitwidth).to(device) elif mode == "bipolar": iVec = torch.rand(size).mul(2).sub(1).mul( 2**bitwidth).round().div(2**bitwidth).to(device) oVec = torch.sum(iVec, acc_dim).to(device) iVecSource = BinGen(iVec, hwcfg, swcfg)().to(device) iVecRNG = RNG(hwcfg, swcfg)().to(device) iVecBS = BSGen(iVecSource, iVecRNG, swcfg).to(device) hwcfg["scale"] = 1 iVecPE = ProgError(iVec, hwcfg).to(device) print("iVecPE cfg", iVecPE.hwcfg) hwcfg["scale"] = scale_mod if scale else 1 oVecPE = ProgError(oVec, hwcfg).to(device) print("oVecPE cfg", oVecPE.hwcfg) with torch.no_grad(): idx = torch.zeros(iVecSource.size()).type( torch.long).to(device) for i in range(2**bitwidth): iBS = iVecBS(idx + i) iVecPE.Monitor(iBS) start_time = time.time() oVecU = uadd(iBS) run_time = time.time() - start_time + run_time if i == 0: print("uadd cfg", uadd.hwcfg) oVecPE.Monitor(oVecU) rmse = torch.sqrt( torch.mean(torch.mul(oVecPE()[1], oVecPE()[1]))) result_pe_cycle.append(1 - rmse.item()) print("--- %s seconds ---" % (time.time() - start_time)) print("RNG: " + rng + ", data: " + mode + ", scaled: " + str(scale)) print("input error: ", "min: ", torch.min(iVecPE()[1]).item(), "max: ", torch.max(iVecPE()[1]).item()) print("output error: ", "min: ", torch.min(oVecPE()[1]).item(), "max: ", torch.max(oVecPE()[1]).item(), "RMSE: ", rmse.item()) print() if plot_en is True: result_pe = oVecPE()[1].cpu().numpy() print("error distribution=========>") plt.figure(figsize=(3, 1.5)) fig = plt.hist( result_pe.flatten(), bins='auto') # arguments are passed to np.histogram plt.show() print("progressive accuracy=========>") plt.figure(figsize=(3, 1.5)) fig = plt.plot(result_pe_cycle ) # arguments are passed to np.histogram plt.show()
def test_bi2uni(): hwcfg = { "width": 8, "mode": "bipolar", "dimr": 1, "rng": "sobol", "scale": 1, "depth": 3 } swcfg = {"rtype": torch.float, "stype": torch.float, "btype": torch.float} bitwidth = hwcfg["width"] mode = hwcfg["mode"] rng = "Sobol" in_dim = 1024 bitwidth = 8 in_mode = "bipolar" out_mode = "unipolar" stype = torch.float btype = torch.float rtype = torch.float uBi2Uni = Bi2Uni(hwcfg, swcfg).to(device) iVec = ((torch.rand(in_dim) * (2**bitwidth)).round() / (2**bitwidth)).to(device) start_time = time.time() oVec = iVec.type(torch.float) print("--- %s seconds ---" % (((time.time() - start_time)) * 2**bitwidth)) print("input", iVec) print("real output", oVec) hwcfg["mode"] = "bipolar" iVecSource = BinGen(iVec, hwcfg, swcfg)().to(device) iVecRNG = RNG(hwcfg, swcfg)().to(device) iVecBS = BSGen(iVecSource, iVecRNG, swcfg).to(device) iVecPE = ProgError(iVec, hwcfg).to(device) hwcfg["mode"] = "unipolar" oVecPE = ProgError(oVec, hwcfg).to(device) hwcfg["mode"] = "bipolar" with torch.no_grad(): idx = torch.zeros(iVecSource.size()).type(torch.long).to(device) start_time = time.time() for i in range((2**bitwidth)): iBS = iVecBS(idx + i) iVecPE.Monitor(iBS) oVecU = uBi2Uni(iBS) oVecPE.Monitor(oVecU) print("--- %s seconds ---" % (time.time() - start_time)) print("final input error: ", min(iVecPE()[1]), max(iVecPE()[1])) print("final output error:", min(oVecPE()[1]), max(oVecPE()[1])) print("final output pp:", oVecPE()[0].data) print("final output pe:", oVecPE()[1].data) print("final output mean error:", oVecPE()[1].mean()) result_pe = oVecPE()[1].cpu().numpy() # fig = plt.hist(result_pe, bins='auto') # arguments are passed to np.histogram # plt.title("Histogram for final output error") # plt.show() print(result_pe) print(result_pe.argmin(), result_pe.argmax()) print(result_pe[result_pe.argmin()], result_pe[result_pe.argmax()]) print(iVec[result_pe.argmin()], iVec[result_pe.argmax()])
def test_fsuconv2d(): plot_en = False hwcfg_input = {"width": 8, "rng": "Sobol", "dimr": 1} hwcfg = { "width": 8, "mode": "bipolar", "scale": None, "depth": 20, "rng": "Sobol", "dimr": 1 } swcfg = {"btype": torch.float, "rtype": torch.float, "stype": torch.float} rng = hwcfg["rng"] in_channels = 32 out_channels = 16 kernel_size = 3 stride = 2 padding = 0 dilation = 1 groups = 1 bias = True padding_mode = 'zeros' modes = ["bipolar", "unipolar"] scaled = [True, False] result_pe = [] for mode in modes: for scale in scaled: hwcfg["mode"] = mode hwcfg_input["mode"] = mode hwcfg["scale"] = (kernel_size * kernel_size * in_channels + bias) if scale else 1 length = 2**hwcfg["width"] length_input = 2**hwcfg_input["width"] result_pe_cycle = [] conv2d = torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias, padding_mode=padding_mode).to(device) if mode == "unipolar": conv2d.weight.data = torch.rand( out_channels, in_channels, kernel_size, kernel_size).mul(length).round().div(length).to(device) if bias is True: conv2d.bias.data = torch.rand(out_channels).mul( length).round().div(length).to(device) elif mode == "bipolar": conv2d.weight.data = torch.rand( out_channels, in_channels, kernel_size, kernel_size).mul( 2).sub(1).mul(length).round().div(length).to(device) if bias is True: conv2d.bias.data = torch.rand(out_channels).mul(2).sub( 1).mul(length).round().div(length).to(device) uconv2d = FSUConv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias, padding_mode=padding_mode, weight_ext=conv2d.weight, bias_ext=conv2d.bias, hwcfg=hwcfg, swcfg=swcfg).to(device) input_size = (128, 32) iVec = ( (torch.rand(32, in_channels, input_size[0], input_size[1]) * length_input).round() / length_input).to(device) oVec = conv2d(iVec) iVecSource = BinGen(iVec, hwcfg, swcfg)().to(device) iVecRNG = RNG(hwcfg_input, swcfg)().to(device) iVecBS = BSGen(iVecSource, iVecRNG, swcfg).to(device) hwcfg["scale"] = 1 iVecPE = ProgError(iVec, hwcfg).to(device) hwcfg["scale"] = (kernel_size * kernel_size * in_channels + bias) if scale else 1 oVecPE = ProgError(oVec, hwcfg).to(device) with torch.no_grad(): idx = torch.zeros(iVecSource.size()).type( torch.long).to(device) start_time = time.time() for i in range(length): iBS = iVecBS(idx + i) iVecPE.Monitor(iBS) oVecU = uconv2d(iBS) oVecPE.Monitor(oVecU) rmse = torch.sqrt( torch.sum(torch.mul(oVecPE()[1], oVecPE()[1])) / torch.prod(torch.tensor(oVecPE()[1].size()))) if plot_en is True: result_pe_cycle.append(1 - rmse.item()) print("--- %s seconds ---" % (time.time() - start_time)) print("RNG: " + rng + ", data: " + mode + ", scaled: " + str(scale)) print("input error: ", "min: ", torch.min(iVecPE()[1]).item(), "max: ", torch.max(iVecPE()[1]).item()) print("output error: ", "min: ", torch.min(oVecPE()[1]).item(), "max: ", torch.max(oVecPE()[1]).item(), "RMSE: ", rmse.item()) print() if plot_en is True: result_pe = oVecPE()[1].cpu().numpy() print("error distribution=========>") plt.figure(figsize=(3, 1.5)) fig = plt.hist( result_pe.flatten(), bins='auto') # arguments are passed to np.histogram plt.show() print("progressive accuracy=========>") plt.figure(figsize=(3, 1.5)) fig = plt.plot(result_pe_cycle ) # arguments are passed to np.histogram plt.show()
def test_fsudiv(): hwcfg = { "width": 8, "mode": "unipolar", "rng": "Sobol", "dimr": 4, "scale": 1, "depth_sa": 3, "depth_ss": 2, "entry_kn": 2 } swcfg = {"rtype": torch.float, "stype": torch.float, "btype": torch.float} bitwidth = hwcfg["width"] mode = hwcfg["mode"] total_cnt = 1 savepdf = False stype = swcfg["stype"] btype = swcfg["btype"] rtype = swcfg["rtype"] print("========================================================") print(mode) print("========================================================") if mode == "unipolar": # all values in unipolar are non-negative # dividend is always non greater than divisor # divisor is non-zero low_bound = 0 up_bound = 2**bitwidth elif mode == "bipolar": # values in bipolar are arbitrarily positive or negative # abs of dividend is always non greater than abs of divisor # abs of divisor is non-zero low_bound = -2**(bitwidth - 1) up_bound = 2**(bitwidth - 1) divisor_list = [] dividend_list = [] for divisor_val in range(up_bound, low_bound - 1, -1): divisor_list.append([]) dividend_list.append([]) for dividend_val in range(low_bound, up_bound + 1, 1): divisor_list[up_bound - divisor_val].append(divisor_val) dividend_list[up_bound - divisor_val].append(dividend_val) dividend = torch.tensor(dividend_list).type( torch.float).div(up_bound).to(device) divisor = torch.tensor(divisor_list).type( torch.float).div(up_bound).to(device) quotient = dividend.div(divisor) # find the invalid postions in quotient quotient_nan = torch.isnan(quotient) quotient_inf = torch.isinf(quotient) quotient_mask = quotient_nan + quotient_inf quotient[quotient_mask] = 0 quotient = quotient.clamp(-1, 1) result_pe_total = [] for rand_idx in range(1, total_cnt + 1): quotientPE = ProgError(quotient, hwcfg).to(device) dividendPE = ProgError(dividend, hwcfg).to(device) dividendSRC = BinGen(dividend, hwcfg, swcfg)().to(device) divisorPE = ProgError(divisor, hwcfg).to(device) divisorSRC = BinGen(divisor, hwcfg, swcfg)().to(device) dut_div = FSUDiv(hwcfg, swcfg).to(device) hwcfg["dimr"] = 1 dividendRNG = RNG(hwcfg, swcfg)().to(device) dividendBS = BSGen(dividendSRC, dividendRNG, swcfg).to(device) divisorRNG = RNG(hwcfg, swcfg)().to(device) divisorBS = BSGen(divisorSRC, divisorRNG, swcfg).to(device) with torch.no_grad(): start_time = time.time() for i in range(2**bitwidth): dividend_bs = dividendBS(torch.tensor([i])) dividendPE.Monitor(dividend_bs) divisor_bs = divisorBS(torch.tensor([i])) divisorPE.Monitor(divisor_bs) quotient_bs = dut_div(dividend_bs, divisor_bs) quotientPE.Monitor(quotient_bs) # get the result for different rng result_pe = quotientPE()[1].cpu().numpy() result_pe[quotient_mask.cpu().numpy()] = np.nan result_pe_total.append(result_pe) # get the result for different rng result_pe_total = np.array(result_pe_total) ####################################################################### # check the error of all simulation ####################################################################### result_pe_total_no_nan = result_pe_total[~np.isnan(result_pe_total)] print("RMSE:{:1.4}".format(math.sqrt(np.mean(result_pe_total_no_nan**2)))) print("MAE: {:1.4}".format(np.mean(np.abs(result_pe_total_no_nan)))) print("bias:{:1.4}".format(np.mean(result_pe_total_no_nan))) print("max: {:1.4}".format(np.max(result_pe_total_no_nan))) print("min: {:1.4}".format(np.min(result_pe_total_no_nan)))
def test_fsumul(): hwcfg = { "width": 8, "mode": "bipolar", "dimr": 1, "dima": 0, "rng": "sobol", "scale": 1, "depth": 10, "entry": None, "static": True } swcfg = {"rtype": torch.float, "stype": torch.float, "btype": torch.float} bitwidth = hwcfg["width"] col = 100 modes = ["bipolar", "unipolar"] for mode in modes: if mode == "unipolar": input_prob = torch.rand(col).mul(2**bitwidth).round().div( 2**bitwidth).to(device) iVec = torch.rand(col).mul(2**bitwidth).round().div( 2**bitwidth).to(device) elif mode == "bipolar": input_prob = torch.rand(col).mul(2).sub(1).mul( 2**bitwidth).round().div(2**bitwidth).to(device) iVec = torch.rand(col).mul(2).sub(1).mul(2**bitwidth).round().div( 2**bitwidth).to(device) hwcfg["mode"] = mode dut_mul = FSUMul(input_prob, hwcfg, swcfg).to(device) oVec = torch.mul(iVec, input_prob).mul(2**bitwidth).round().div( 2**bitwidth).to(device) iVecPE = ProgError(iVec, hwcfg).to(device) oVecPE = ProgError(oVec, hwcfg).to(device) iVecSource = BinGen(iVec, hwcfg, swcfg)().to(device) iVecRNG = RNG(hwcfg, swcfg)().to(device) iVecBS = BSGen(iVecSource, iVecRNG, swcfg).to(device) with torch.no_grad(): start_time = time.time() for i in range( 2** bitwidth): # unary cycle count 2^n for n-bit binary data iBS = iVecBS(torch.tensor([i])) # input bit stream generation iVecPE.Monitor(iBS) # input accuracy measurement oVecU = dut_mul(iBS) # computing kernel, e.g., multiplication oVecPE.Monitor(oVecU) # output accuracy measurement print("--- %s seconds ---" % (time.time() - start_time)) print("input error: ", torch.min(iVecPE()[1]), torch.max(iVecPE()[1])) print("output error: ", torch.min(oVecPE()[1]), torch.max(oVecPE()[1])) result_pe = oVecPE()[1].cpu().numpy() print("RMSE", math.sqrt(sum(result_pe**2) / len(result_pe))) print("bias", sum(result_pe) / len(result_pe))
def test_fsusqrt(mode="unipolar", bitwidth=8, emit=True, jk_trace=False, depth_kernel=1, depth_sr=4, savepdf=False, total_cnt=1): hwcfg = { "width": bitwidth, "mode": mode, "dima": 0, "scale": 1, "depth": 10, "entry": None, "jk_trace": jk_trace, "emit": emit, "entry_kn": depth_kernel, "entry_sr": depth_sr, "rng": "Sobol", "dimr": 4 } swcfg = {"rtype": torch.float, "stype": torch.float, "btype": torch.float} bitwidth = hwcfg["width"] rng = hwcfg["rng"] print("========================================================") print(mode) print("========================================================") # all input values are non-negative low_bound = 0 if mode == "unipolar": up_bound = 2**bitwidth elif mode == "bipolar": low_bound = 0 up_bound = 2**(bitwidth - 1) input_list = [] for input_val in range(low_bound, up_bound + 1, 1): input_list.append(input_val) input = torch.tensor(input_list).type(torch.float).div(up_bound).to(device) output = torch.sqrt(input).to(device) result_pe_total = [] for rand_idx in range(1, total_cnt + 1): outputPE = ProgError(output, hwcfg).to(device) inputPE = ProgError(input, hwcfg).to(device) inputSRC = BinGen(input, hwcfg, swcfg)().to(device) dut_sqrt = FSUSqrt(hwcfg, swcfg).to(device) inputRNG = RNG(hwcfg, swcfg)().to(device) inputBS = BSGen(inputSRC, inputRNG, swcfg).to(device) with torch.no_grad(): start_time = time.time() for i in range(2**bitwidth): input_bs = inputBS(torch.tensor([i])) inputPE.Monitor(input_bs) ouyput_bs = dut_sqrt(input_bs) outputPE.Monitor(ouyput_bs) # get the result for different rng result_pe = outputPE()[1].cpu().numpy() result_pe_total.append(result_pe) # get the result for different rng result_pe_total = np.array(result_pe_total) ####################################################################### # check the error of all simulation ####################################################################### print("RMSE:{:1.4}".format(math.sqrt(np.mean(result_pe_total**2)))) print("MAE: {:1.4}".format(np.mean(np.abs(result_pe_total)))) print("bias:{:1.4}".format(np.mean(result_pe_total))) print("max: {:1.4}".format(np.max(result_pe_total))) print("min: {:1.4}".format(np.min(result_pe_total)))
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', weight_ext=None, bias_ext=None, hwcfg={ "width": 8, "mode": "bipolar", "rng": "Sobol", "dimr": 1 }, swcfg={ "btype": torch.float, "rtype": torch.float, "stype": torch.float }): super(FSUConv2dPC, self).__init__(in_channels, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias, padding_mode=padding_mode) self.hwcfg = {} self.hwcfg["width"] = hwcfg["width"] self.hwcfg["mode"] = hwcfg["mode"].lower() self.hwcfg["rng"] = hwcfg["rng"].lower() self.hwcfg["dimr"] = hwcfg["dimr"] self.swcfg = {} self.swcfg["btype"] = swcfg["btype"] self.swcfg["rtype"] = swcfg["rtype"] self.swcfg["stype"] = swcfg["stype"] self.mode = hwcfg["mode"].lower() assert self.mode in ["unipolar", "bipolar"], \ "Error: the hw config 'mode' in " + str(self) + " class requires one of ['unipolar', 'bipolar']." self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = kernel_size self.stride = stride self.padding = padding self.dilation = dilation assert groups == 1, \ "Error: the 'groups' in " + str(self) + " class requires to be 1." assert padding_mode == 'zeros', \ "Error: the 'padding_mode' in " + str(self) + " class requires to be 'zeros'." # bias indication for original linear layer self.has_bias = bias # RNG for weight hwcfg_wrng = { "width": hwcfg["width"], "rng": hwcfg["rng"], "dimr": hwcfg["dimr"] } self.wrng = RNG(hwcfg_wrng, swcfg)() if hwcfg["rng"].lower() in ["race", "tc", "race10", "tc10"]: self.wtc = True else: self.wtc = False # define the linear weight and bias if weight_ext is not None: assert (weight_ext.size()[0], weight_ext.size()[1], weight_ext.size()[2], weight_ext.size()[3]) == (out_channels, in_channels, num2tuple(kernel_size)[0], num2tuple(kernel_size)[1]), \ "Error: the hw config 'out_channels, in_channels, kernel_size' in " + str(self) + " class unmatches the binary weight shape." self.weight.data = BinGen(weight_ext, self.hwcfg, self.swcfg)() if bias and (bias_ext is not None): assert bias_ext.size()[0] == out_channels, \ "Error: the hw config 'out_channels' in " + str(self) + " class unmatches the binary bias shape." self.bias.data = BinGen(bias_ext, self.hwcfg, self.swcfg)() # RNG for bias, same as RNG for weight hwcfg_brng = { "width": hwcfg["width"], "rng": hwcfg["rng"], "dimr": hwcfg["dimr"] } self.brng = RNG(hwcfg_brng, swcfg)() # define the kernel linear for input bit 1 self.wbsg_i1 = BSGen(self.weight.view(1, self.weight.size()[0], -1), self.wrng, swcfg) self.wrdx_i1 = torch.nn.Parameter(torch.zeros_like(self.weight, dtype=torch.long), requires_grad=False).view( 1, self.weight.size()[0], -1) if self.has_bias is True: self.bbsg = BSGen(self.bias, self.brng, swcfg) self.brdx = torch.nn.Parameter(torch.zeros_like(self.bias, dtype=torch.long), requires_grad=False) # if bipolar, define a kernel for input bit 0, note that there is no bias required for this kernel if (self.mode == "bipolar") and (self.wtc is False): self.wbsg_i0 = BSGen( self.weight.view(1, self.weight.size()[0], -1), self.wrng, swcfg) self.wrdx_i0 = torch.nn.Parameter( torch.zeros_like(self.weight, dtype=torch.long), requires_grad=False).view(1, self.weight.size()[0], -1) # indicator of even/odd cycle self.even_cycle_flag = torch.nn.Parameter(torch.ones(1, dtype=torch.bool), requires_grad=False) self.padding_0 = torch.nn.ConstantPad2d(self.padding, 0) self.padding_1 = torch.nn.ConstantPad2d(self.padding, 1) self.bipolar_mode = torch.nn.Parameter(torch.tensor( [self.mode == "bipolar"], dtype=torch.bool), requires_grad=False)
def __init__(self, in_1_prob=None, hwcfg={ "width": 8, "mode": "bipolar", "static": False, "rng": "Sobol", "dimr": 1 }, swcfg={ "rtype": torch.float, "stype": torch.float }): super(FSUMul, self).__init__() self.hwcfg = {} self.hwcfg["width"] = hwcfg["width"] self.hwcfg["mode"] = hwcfg["mode"].lower() self.hwcfg["static"] = hwcfg["static"] self.hwcfg["rng"] = hwcfg["rng"].lower() self.hwcfg["dimr"] = hwcfg["dimr"] self.swcfg = {} self.swcfg["rtype"] = swcfg["rtype"] self.swcfg["stype"] = swcfg["stype"] self.entry = 2**hwcfg["width"] self.static = hwcfg["static"] self.stype = swcfg["stype"] self.rtype = swcfg["rtype"] self.mode = hwcfg["mode"].lower() assert self.mode in ["unipolar", "bipolar"], \ "Error: the hw config 'mode' in " + str(self) + " class requires one of ['unipolar', 'bipolar']." # the random number generator used in computation self.rng = RNG(hwcfg, swcfg)() if self.static is True: # the probability of in_1 used in static computation self.in_1_prob = in_1_prob assert in_1_prob is not None, \ "Error: the static multiplier requires in_1_prob in " + str(self) + " class." # directly create an unchange bitstream generator for static computation self.source_gen = BinGen(self.in_1_prob, hwcfg, swcfg)() self.bsg = BSGen(self.source_gen, self.rng, {"stype": torch.int8}) # rng_idx is used later as an enable signal, get update every cycled self.rng_idx = torch.nn.Parameter(torch.zeros(1).type(torch.long), requires_grad=False) # Generate two seperate bitstream generators and two enable signals for bipolar mode if self.mode == "bipolar": self.bsg_inv = BSGen(self.source_gen, self.rng, {"stype": torch.int8}) self.rng_idx_inv = torch.nn.Parameter(torch.zeros(1).type( torch.long), requires_grad=False) else: # use a shift register to store the count of 1s in one bitstream to generate data sr_hwcfg = {"entry": self.entry} self.sr = ShiftReg(sr_hwcfg, swcfg) self.rng_idx = torch.nn.Parameter(torch.zeros(1).type(torch.long), requires_grad=False) if self.mode == "bipolar": self.rng_idx_inv = torch.nn.Parameter(torch.zeros(1).type( torch.long), requires_grad=False)
def test_fsumul_in_stream(): bitwidth = 12 depth = 4 hwcfg = { "width": bitwidth, "mode": "bipolar", "dimr": 1, "dima": 0, "rng": "sobol", "scale": 1, "depth": 10, "entry": None, "static": False } swcfg = {"rtype": torch.float, "stype": torch.float, "btype": torch.float} col = 100 modes = ["bipolar", "unipolar"] for mode in modes: if mode == "unipolar": input_prob_0 = torch.rand(col).mul(2**bitwidth).round().div( 2**bitwidth).to(device) input_prob_1 = torch.rand(col).mul(2**bitwidth).round().div( 2**bitwidth).to(device) elif mode == "bipolar": input_prob_0 = torch.rand(col).mul(2).sub(1).mul( 2**bitwidth).round().div(2**bitwidth).to(device) input_prob_1 = torch.rand(col).mul(2).sub(1).mul( 2**bitwidth).round().div(2**bitwidth).to(device) hwcfg["mode"] = mode hwcfg["width"] = depth dut_mul = FSUMul(None, hwcfg, swcfg).to(device) hwcfg["width"] = bitwidth oVec = torch.mul(input_prob_0, input_prob_1).mul( 2**bitwidth).round().div(2**bitwidth).to(device) prob_0_PE = ProgError(input_prob_0, hwcfg).to(device) prob_1_PE = ProgError(input_prob_1, hwcfg).to(device) oVecPE = ProgError(oVec, hwcfg).to(device) prob_0_Source = BinGen(input_prob_0, hwcfg, swcfg)().to(device) prob_1_Source = BinGen(input_prob_1, hwcfg, swcfg)().to(device) iVecRNG0 = RNG(hwcfg, swcfg)().to(device) iVecRNG1 = RNG(hwcfg, swcfg)().to(device) prob_0_BS = BSGen(prob_0_Source, iVecRNG0, swcfg).to(device) prob_1_BS = BSGen(prob_1_Source, iVecRNG1, swcfg).to(device) with torch.no_grad(): start_time = time.time() idx = torch.zeros(input_prob_0.size()).type(torch.long).to(device) for i in range(2**bitwidth): #print(i) iBS_0 = prob_0_BS(idx + i) iBS_1 = prob_1_BS(idx + i) prob_0_PE.Monitor(iBS_0) prob_1_PE.Monitor(iBS_1) oVecU = dut_mul(iBS_0, iBS_1) oVecPE.Monitor(oVecU) print("--- %s seconds ---" % (time.time() - start_time)) print(mode) print("input 0 error: ", "min:", torch.min(prob_0_PE()[1]), "max:", torch.max(prob_0_PE()[1])) print("input 1 error: ", "min:", torch.min(prob_1_PE()[1]), "max:", torch.max(prob_1_PE()[1])) print("output error: ", "min:", torch.min(oVecPE()[1]), "max:", torch.max(oVecPE()[1]), "rmse:", torch.sqrt(torch.mean(torch.mul(oVecPE()[1], oVecPE()[1]))), "bias:", torch.mean(oVecPE()[1]))
def test_fsusignabs(): total_cnt = 5 hwcfg = { "width": 8, "mode": "bipolar", "dimr": 1, "rng": "sobol", "scale": 1, "depth": 5 } swcfg = {"rtype": torch.float, "stype": torch.float, "btype": torch.float} bitwidth = hwcfg["width"] mode = hwcfg["mode"] print("========================================================") print(mode) print("========================================================") # all input values are non-negative low_bound = 0 if mode == "unipolar": up_bound = 2**bitwidth elif mode == "bipolar": low_bound = -2**(bitwidth - 1) up_bound = 2**(bitwidth - 1) input_list = [] for input_val in range(low_bound, up_bound + 1, 1): input_list.append(input_val) input = torch.tensor(input_list).type(torch.float).div(up_bound).to(device) # input = torch.tensor([-1/256]).type(torch.float).div(up_bound).to(device) output = torch.abs(input).to(device) result_pe_total = [] for rand_idx in range(1, total_cnt + 1): outputPE = ProgError(output, hwcfg).to(device) inputPE = ProgError(input, hwcfg).to(device) inputSRC = BinGen(input, hwcfg, swcfg)().to(device) dut = FSUSignAbs(hwcfg, swcfg).to(device) inputRNG = RNG(hwcfg, swcfg)().to(device) inputBS = BSGen(inputSRC, inputRNG, swcfg).to(device) with torch.no_grad(): start_time = time.time() for i in range(2**bitwidth): input_bs = inputBS(torch.tensor([i])) inputPE.Monitor(input_bs) _, output_bs = dut(input_bs) outputPE.Monitor(output_bs) # get the result for different rng result_pe = outputPE()[1].cpu().numpy() result_pe_total.append(result_pe) # get the result for different rng result_pe_total = np.array(result_pe_total) ####################################################################### # check the error of all simulation ####################################################################### print("RMSE:{:1.4}".format(np.sqrt(np.mean(result_pe_total**2)))) print("MAE: {:1.4}".format(np.mean(np.abs(result_pe_total)))) print("bias:{:1.4}".format(np.mean(result_pe_total))) print("max: {:1.4}".format(np.max(result_pe_total))) print("min: {:1.4}".format(np.min(result_pe_total)))
def __init__(self, in_features, out_features, bias=True, weight_ext=None, bias_ext=None, hwcfg={ "width": 8, "mode": "bipolar", "rng": "Sobol", "dimr": 1 }, swcfg={ "btype": torch.float, "rtype": torch.float, "stype": torch.float }): super(FSULinearPC, self).__init__(in_features, out_features, bias=bias) self.hwcfg = {} self.hwcfg["width"] = hwcfg["width"] self.hwcfg["mode"] = hwcfg["mode"].lower() self.hwcfg["rng"] = hwcfg["rng"].lower() self.hwcfg["dimr"] = hwcfg["dimr"] self.swcfg = {} self.swcfg["btype"] = swcfg["btype"] self.swcfg["rtype"] = swcfg["rtype"] self.swcfg["stype"] = swcfg["stype"] self.mode = hwcfg["mode"].lower() assert self.mode in ["unipolar", "bipolar"], \ "Error: the hw config 'mode' in " + str(self) + " class requires one of ['unipolar', 'bipolar']." # bias indication for original linear layer self.has_bias = bias # RNG for weight hwcfg_wrng = { "width": hwcfg["width"], "rng": hwcfg["rng"], "dimr": hwcfg["dimr"] } self.wrng = RNG(hwcfg_wrng, swcfg)() if hwcfg["rng"].lower() in ["race", "tc", "race10", "tc10"]: self.wtc = True else: self.wtc = False # define the linear weight and bias if weight_ext is not None: assert (weight_ext.size()[0], weight_ext.size()[1]) == (out_features, in_features), \ "Error: the hw config 'out_features, in_features' in " + str(self) + " class unmatches the binary weight shape." self.weight.data = BinGen(weight_ext, self.hwcfg, self.swcfg)() if bias and (bias_ext is not None): assert bias_ext.size()[0] == out_features, \ "Error: the hw config 'out_features' in " + str(self) + " class unmatches the binary bias shape." self.bias.data = BinGen(bias_ext, self.hwcfg, self.swcfg)() # RNG for bias, same as RNG for weight hwcfg_brng = { "width": hwcfg["width"], "rng": hwcfg["rng"], "dimr": hwcfg["dimr"] } self.brng = RNG(hwcfg_brng, swcfg)() # define the kernel linear for input bit 1 self.wbsg_i1 = BSGen(self.weight, self.wrng, swcfg) self.wrdx_i1 = torch.nn.Parameter(torch.zeros_like(self.weight, dtype=torch.long), requires_grad=False).unsqueeze(0) if self.has_bias is True: self.bbsg = BSGen(self.bias, self.brng, swcfg) self.brdx = torch.nn.Parameter(torch.zeros_like(self.bias, dtype=torch.long), requires_grad=False) # if bipolar, define a kernel for input bit 0, note that there is no bias required for this kernel if (self.mode == "bipolar") and (self.wtc is False): self.wbsg_i0 = BSGen(self.weight, self.wrng, swcfg) self.wrdx_i0 = torch.nn.Parameter(torch.zeros_like( self.weight, dtype=torch.long), requires_grad=False).unsqueeze(0)