Ejemplo n.º 1
0
 def __init__(self,
              SimConfig_path,
              indata=0,
              rdata=0,
              outprecision=8,
              default_inbuf_size=16,
              default_outbuf_size=4,
              default_inchannel=64,
              default_size=9):
     # indata: volume of input data (for pooling) (Byte)
     # rdata: volume of data from buffer to iReg (Byte)
     # default_inbuf_size: the default PE-level input buffer size (unit: KB)
     # default_outbuf_size: the default Tile-level output buffer size (unit: KB)
     self.pooling = Pooling(SimConfig_path=SimConfig_path)
     self.inbuf = buffer(SimConfig_path=SimConfig_path,
                         buf_level=1,
                         default_buf_size=default_inbuf_size)
     self.inbuf.calculate_buf_write_latency(indata)
     self.inbuf_wlatency = self.inbuf.buf_wlatency
     # unit: ns
     self.inbuf.calculate_buf_read_latency(rdata)
     self.inbuf_rlatency = self.inbuf.buf_rlatency
     self.pooling.calculate_Pooling_latency(inchannel=default_inchannel,
                                            insize=default_size)
     self.digital_latency = self.pooling.Pooling_latency
     self.outbuf = buffer(SimConfig_path=SimConfig_path,
                          buf_level=2,
                          default_buf_size=default_outbuf_size)
     self.outbuf.calculate_buf_write_latency(wdata=(default_inchannel *
                                                    outprecision / 8))
     self.outbuf_rlatency = 0
     self.outbuf_wlatency = self.outbuf.buf_wlatency
     self.pooling_latency = self.inbuf_wlatency + self.inbuf_rlatency + self.digital_latency + self.outbuf_rlatency + self.outbuf_wlatency
Ejemplo n.º 2
0
 def calculate_PE_area(self, SimConfig_path=None, default_inbuf_size=16):
     # unit: um^2
     self.inbuf = buffer(SimConfig_path=SimConfig_path,
                         buf_level=1,
                         default_buf_size=default_inbuf_size)
     self.inbuf.calculate_buf_area()
     self.calculate_xbar_area()
     self.calculate_demux_area()
     self.calculate_mux_area()
     self.calculate_DAC_area()
     self.calculate_ADC_area()
     self.PE_adder.calculate_adder_area()
     self.PE_shiftreg.calculate_shiftreg_area()
     self.PE_iReg.calculate_reg_area()
     self.PE_oReg.calculate_reg_area()
     self.PE_xbar_area = self.PE_xbar_num * self.xbar_area
     self.PE_ADC_area = self.ADC_area * self.PE_ADC_num
     self.PE_DAC_area = self.DAC_area * self.PE_DAC_num
     self.PE_adder_area = self.PE_group_ADC_num * self.PE_adder_num * self.PE_adder.adder_area
     self.PE_shiftreg_area = self.PE_ADC_num * self.PE_shiftreg.shiftreg_area
     self.PE_iReg_area = self.PE_DAC_num * self.PE_iReg.reg_area
     self.PE_oReg_area = self.PE_ADC_num * self.PE_oReg.reg_area
     self.PE_input_demux_area = self.input_demux_area * self.PE_DAC_num
     self.PE_output_mux_area = self.output_mux_area * self.PE_ADC_num
     self.PE_digital_area = self.PE_adder_area + self.PE_shiftreg_area + self.PE_input_demux_area + \
             self.PE_output_mux_area + self.PE_iReg_area + self.PE_oReg_area
     self.PE_inbuf_area = self.inbuf.buf_area
     self.PE_area = self.PE_xbar_area + self.PE_ADC_area + self.PE_DAC_area + self.PE_digital_area + self.PE_inbuf_area
Ejemplo n.º 3
0
 def __init__(self, SimConfig_path, read_row=0, read_column=0, indata=0, rdata=0, inprecision = 8,
              PE_num=0, default_inbuf_size = 16, default_outbuf_size =4):
     # read_row: activated WL number in crossbar
     # read_column: activated BL number in crossbar
     # indata: volume of input data (for PE) (Byte)
     # rdata: volume of data from buffer to iReg (Byte)
     # outdata: volume of output data (for PE) (Byte)
     # inprecision: input data precision of each Xbar
     # PE_num: used PE_number in one tile
     # default_inbuf_size: the default PE-level input buffer size (unit: KB)
     # default_outbuf_size: the default Tile-level output buffer size (unit: KB)
     PE_latency_analysis.__init__(self, SimConfig_path, read_row=read_row, read_column=read_column,
                                  indata=indata, rdata=rdata, inprecision=inprecision, default_buf_size = default_inbuf_size)
     tilel_config = cp.ConfigParser()
     tilel_config.read(SimConfig_path, encoding='UTF-8')
     self.intra_tile_bandwidth = float(tilel_config.get('Tile level', 'Intra_Tile_Bandwidth'))
     merge_time = math.ceil(math.log2(PE_num))
     self.tile_PE_num = list(map(int, tilel_config.get('Tile level', 'PE_Num').split(',')))
     if self.tile_PE_num[0] == 0:
         self.tile_PE_num[0] = 4
         self.tile_PE_num[1] = 4
     assert self.tile_PE_num[0] > 0, "PE number in one PE < 0"
     assert self.tile_PE_num[1] > 0, "PE number in one PE < 0"
     self.tile_PE_total_num = self.tile_PE_num[0] * self.tile_PE_num[1]
     assert PE_num <= self.tile_PE_total_num, "PE number exceeds the range"
     self.outbuf = buffer(SimConfig_path=SimConfig_path, buf_level=2, default_buf_size=default_outbuf_size)
     total_level = math.ceil(math.log2(self.tile_PE_total_num))
     self.jointmodule_latency = merge_time * self.digital_period
     self.transfer_latency = (total_level*(self.PE.ADC_precision+merge_time)-merge_time*(merge_time+1)/2)\
                             *read_column/self.intra_tile_bandwidth
     self.outbuf.calculate_buf_write_latency(wdata=((self.PE.ADC_precision + merge_time)*read_column*PE_num/8))
     self.tile_buf_rlatency = 0
     self.tile_buf_wlatency = self.outbuf.buf_wlatency
      # do not consider
     self.tile_latency = self.PE_latency + self.jointmodule_latency + self.transfer_latency + self.tile_buf_wlatency
Ejemplo n.º 4
0
 def calculate_PE_read_power_fast(self,
                                  max_column=0,
                                  max_row=0,
                                  max_group=0,
                                  SimConfig_path=None,
                                  default_inbuf_size=16):
     # unit: W
     # coarse but fast estimation
     # max_column: maximum used column in one crossbar in this tile
     # max_row: maximum used row in one crossbar in this tile
     # max_group: maximum used groups in one PE
     self.inbuf = buffer(SimConfig_path=SimConfig_path,
                         buf_level=1,
                         default_buf_size=default_inbuf_size)
     self.inbuf.calculate_buf_read_power()
     self.inbuf.calculate_buf_write_power()
     self.calculate_DAC_power()
     self.calculate_ADC_power()
     self.calculate_demux_power()
     self.calculate_mux_power()
     self.PE_shiftreg.calculate_shiftreg_power()
     self.PE_iReg.calculate_reg_power()
     self.PE_oReg.calculate_reg_power()
     self.PE_adder.calculate_adder_power()
     self.PE_read_power = 0
     self.PE_xbar_read_power = 0
     self.PE_ADC_read_power = 0
     self.PE_DAC_read_power = 0
     self.PE_adder_read_power = 0
     self.PE_shiftreg_read_power = 0
     self.PE_iReg_read_power = 0
     self.PE_oReg_read_power = 0
     self.input_demux_read_power = 0
     self.output_mux_read_power = 0
     self.PE_digital_read_power = 0
     self.xbar_read_config(read_row=max_row, read_column=max_column)
     self.calculate_xbar_read_power()
     self.PE_xbar_read_power = self.PE_multiplex_xbar_num[
         1] * max_group * self.xbar_read_power / self.input_demux / self.output_mux
     self.PE_DAC_read_power = max_group * math.ceil(
         max_row / self.input_demux) * self.DAC_power
     self.PE_ADC_read_power = max_group * math.ceil(
         max_column / self.output_mux) * self.ADC_power
     self.input_demux_read_power = max_group * math.ceil(
         max_row / self.input_demux) * self.input_demux_power
     self.output_mux_read_power = max_group * math.ceil(
         max_column / self.output_mux) * self.output_mux_power
     self.PE_adder_read_power = (max_group - 1) * math.ceil(
         max_column / self.output_mux) * self.PE_adder.adder_power
     self.PE_shiftreg_read_power = max_group * math.ceil(
         max_column / self.output_mux) * self.PE_shiftreg.shiftreg_power
     self.PE_iReg_read_power = max_group * math.ceil(
         max_row / self.input_demux) * self.PE_iReg.reg_power
     self.PE_oReg_read_power = max_group * math.ceil(
         max_column / self.output_mux) * self.PE_oReg.reg_power
     self.PE_digital_read_power = self.input_demux_read_power + self.output_mux_read_power + self.PE_adder_read_power + self.PE_shiftreg_read_power + self.PE_iReg_read_power + self.PE_oReg_read_power
     self.PE_inbuf_read_rpower = self.inbuf.buf_rpower * 1e-3
     self.PE_inbuf_read_wpower = self.inbuf.buf_wpower * 1e-3
     self.PE_inbuf_read_power = self.PE_inbuf_read_rpower + self.PE_inbuf_read_wpower
     self.PE_read_power = self.PE_xbar_read_power + self.PE_DAC_read_power + self.PE_ADC_read_power + self.PE_digital_read_power + self.PE_inbuf_read_power
Ejemplo n.º 5
0
    def calculate_model_area(self):  #Todo: Noc area

        self.graph.tile.calculate_tile_area(
            SimConfig_path=self.SimConfig_path,
            default_inbuf_size=self.graph.max_inbuf_size,
            default_outbuf_size=self.graph.max_outbuf_size)
        self.global_buf = buffer(SimConfig_path=self.SimConfig_path,
                                 buf_level=1,
                                 default_buf_size=self.graph.global_buf_size)
        self.global_buf.calculate_buf_area()
        self.global_add = adder(SimConfig_path=self.SimConfig_path,
                                bitwidth=self.graph.global_adder_bitwidth)
        self.global_add.calculate_adder_area()
        for i in range(self.total_layer_num):
            tile_num = self.graph.layer_tileinfo[i]['tilenum']
            self.arch_area[i] = self.graph.tile.tile_area * tile_num
            self.arch_xbar_area[i] = self.graph.tile.tile_xbar_area * tile_num
            self.arch_ADC_area[i] = self.graph.tile.tile_ADC_area * tile_num
            self.arch_DAC_area[i] = self.graph.tile.tile_DAC_area * tile_num
            self.arch_digital_area[
                i] = self.graph.tile.tile_digital_area * tile_num
            self.arch_adder_area[
                i] = self.graph.tile.tile_adder_area * tile_num
            self.arch_shiftreg_area[
                i] = self.graph.tile.tile_shiftreg_area * tile_num
            self.arch_iReg_area[i] = self.graph.tile.tile_iReg_area * tile_num
            self.arch_oReg_area[i] = self.graph.tile.tile_oReg_area * tile_num
            self.arch_input_demux_area[
                i] = self.graph.tile.tile_input_demux_area * tile_num
            self.arch_output_mux_area[
                i] = self.graph.tile.tile_output_mux_area * tile_num
            self.arch_jointmodule_area[
                i] = self.graph.tile.tile_jointmodule_area * tile_num
            self.arch_buf_area[i] = self.graph.tile.tile_buffer_area * tile_num
            self.arch_pooling_area[
                i] = self.graph.tile.tile_pooling_area * tile_num
        self.arch_total_area = sum(self.arch_area)
        self.arch_total_xbar_area = sum(self.arch_xbar_area)
        self.arch_total_ADC_area = sum(self.arch_ADC_area)
        self.arch_total_DAC_area = sum(self.arch_DAC_area)
        self.arch_total_digital_area = sum(
            self.arch_digital_area
        ) + self.global_add.adder_area * self.graph.global_adder_num
        self.arch_total_adder_area = sum(
            self.arch_adder_area
        ) + self.global_add.adder_area * self.graph.global_adder_num
        self.arch_total_shiftreg_area = sum(self.arch_shiftreg_area)
        self.arch_total_iReg_area = sum(self.arch_iReg_area)
        self.arch_total_oReg_area = sum(self.arch_oReg_area)
        self.arch_total_input_demux_area = sum(self.arch_input_demux_area)
        self.arch_total_output_mux_area = sum(self.arch_output_mux_area)
        self.arch_total_jointmodule_area = sum(self.arch_jointmodule_area)
        self.arch_total_buf_area = sum(
            self.arch_buf_area) + self.global_buf.buf_area
        self.arch_total_pooling_area = sum(self.arch_pooling_area)
Ejemplo n.º 6
0
    def calculate_tile_area(self,
                            SimConfig_path=None,
                            default_inbuf_size=16,
                            default_outbuf_size=4):
        # unit: um^2
        self.tile_area = 0
        self.tile_xbar_area = 0
        self.tile_ADC_area = 0
        self.tile_DAC_area = 0
        self.tile_input_demux_area = 0
        self.tile_output_mux_area = 0
        self.tile_shiftreg_area = 0
        self.tile_iReg_area = 0
        self.tile_oReg_area = 0
        self.tile_adder_area = 0
        self.tile_buffer_area = 0
        self.tile_digital_area = 0
        self.tile_adder.calculate_adder_area()
        self.tile_shiftreg.calculate_shiftreg_area()
        self.tile_iReg.calculate_reg_area()
        self.tile_oReg.calculate_reg_area()
        self.tile_jointmodule.calculate_jointmodule_area()
        self.tile_buffer = buffer(SimConfig_path=SimConfig_path,
                                  buf_level=2,
                                  default_buf_size=default_outbuf_size)
        self.tile_buffer.calculate_buf_area()
        self.tile_pooling.calculate_Pooling_area()

        for i in range(self.tile_PE_num[0]):
            for j in range(self.tile_PE_num[1]):
                self.tile_PE_list[i][j].calculate_PE_area(
                    SimConfig_path=SimConfig_path,
                    default_inbuf_size=default_inbuf_size)
                self.tile_xbar_area += self.tile_PE_list[i][j].PE_xbar_area
                self.tile_ADC_area += self.tile_PE_list[i][j].PE_ADC_area
                self.tile_DAC_area += self.tile_PE_list[i][j].PE_DAC_area
                # self.tile_digital_area += self.tile_PE_list[i][j].PE_digital_area
                self.tile_input_demux_area += self.tile_PE_list[i][
                    j].PE_input_demux_area
                self.tile_output_mux_area += self.tile_PE_list[i][
                    j].PE_output_mux_area
                self.tile_shiftreg_area += self.tile_PE_list[i][
                    j].PE_shiftreg_area
                self.tile_iReg_area += self.tile_PE_list[i][j].PE_iReg_area
                self.tile_oReg_area += self.tile_PE_list[i][j].PE_oReg_area
                self.tile_adder_area += self.tile_PE_list[i][j].PE_adder_area
                self.tile_buffer_area += self.tile_PE_list[i][j].PE_inbuf_area
        # self.tile_adder_area += self.tile_adder_num * self.tile_adder.adder_area
        # self.tile_shiftreg_area += self.tile_shiftreg_num * self.tile_shiftreg.shiftreg_area
        self.tile_jointmodule_area = self.tile_jointmodule_num * self.tile_jointmodule.jointmodule_area
        self.tile_digital_area = self.tile_input_demux_area + self.tile_output_mux_area + self.tile_adder_area \
               + self.tile_shiftreg_area + self.tile_jointmodule_area + self.tile_iReg_area + self.tile_oReg_area
        self.tile_pooling_area = self.tile_pooling.Pooling_area
        self.tile_buffer_area += self.tile_buffer.buf_area
        self.tile_area = self.tile_xbar_area + self.tile_ADC_area + self.tile_DAC_area + self.tile_digital_area + self.tile_buffer_area + self.tile_pooling_area
Ejemplo n.º 7
0
 def calculate_model_energy(self):
     #print(self.model_latency.total_buffer_r_latency)
     self.global_buf = buffer(SimConfig_path=self.SimConfig_path,
                              buf_level=1,
                              default_buf_size=self.graph.global_buf_size)
     self.global_buf.calculate_buf_read_power()
     self.global_buf.calculate_buf_write_power()
     self.global_add = adder(SimConfig_path=self.SimConfig_path,
                             bitwidth=self.graph.global_adder_bitwidth)
     self.global_add.calculate_adder_power()
     for i in range(self.total_layer_num):
         tile_num = self.graph.layer_tileinfo[i]['tilenum']
         self.arch_xbar_energy[i] = self.model_power.arch_xbar_power[
             i] * self.model_latency.total_xbar_latency[i]
         self.arch_ADC_energy[i] = self.model_power.arch_ADC_power[
             i] * self.model_latency.total_ADC_latency[i]
         self.arch_DAC_energy[i] = self.model_power.arch_DAC_power[
             i] * self.model_latency.total_DAC_latency[i]
         self.arch_adder_energy[i] = self.model_power.arch_adder_power[
             i] * self.model_latency.total_adder_latency[i]
         self.arch_shiftreg_energy[
             i] = self.model_power.arch_shiftreg_power[
                 i] * self.model_latency.total_shiftreg_latency[i]
         self.arch_iReg_energy[i] = self.model_power.arch_iReg_power[
             i] * self.model_latency.total_iReg_latency[i]
         self.arch_oReg_energy[i] = self.model_power.arch_oReg_power[
             i] * self.model_latency.total_oReg_latency[i]
         self.arch_input_demux_energy[
             i] = self.model_power.arch_input_demux_power[
                 i] * self.model_latency.total_input_demux_latency[i]
         self.arch_output_mux_energy[
             i] = self.model_power.arch_output_mux_power[
                 i] * self.model_latency.total_output_mux_latency[i]
         self.arch_jointmodule_energy[
             i] = self.model_power.arch_jointmodule_power[
                 i] * self.model_latency.total_jointmodule_latency[i]
         self.arch_buf_r_energy[i] = self.model_power.arch_buf_r_power[
             i] * self.model_latency.total_buffer_r_latency[i]
         self.arch_buf_w_energy[i] = self.model_power.arch_buf_w_power[
             i] * self.model_latency.total_buffer_w_latency[i]
         self.arch_buf_energy[
             i] = self.arch_buf_r_energy[i] + self.arch_buf_w_energy[i]
         self.arch_pooling_energy[i] = self.model_power.arch_pooling_power[
             i] * self.model_latency.total_pooling_latency[i]
         self.arch_digital_energy[i] = self.arch_shiftreg_energy[i]+self.arch_iReg_energy[i]+self.arch_oReg_energy[i]+\
                                       self.arch_input_demux_energy[i]+self.arch_output_mux_energy[i]+self.arch_jointmodule_energy[i]
         self.arch_energy[i] = self.arch_xbar_energy[i]+self.arch_ADC_energy[i]+self.arch_DAC_energy[i]+\
                               self.arch_digital_energy[i]+self.arch_buf_energy[i]+self.arch_pooling_energy[i]
     self.arch_total_energy = sum(self.arch_energy) + self.arch_Noc_energy
     self.arch_total_xbar_energy = sum(self.arch_xbar_energy)
     self.arch_total_ADC_energy = sum(self.arch_ADC_energy)
     self.arch_total_DAC_energy = sum(self.arch_DAC_energy)
     self.arch_total_digital_energy = sum(self.arch_digital_energy)+\
                                      self.global_add.adder_power*self.graph.global_adder_num*self.global_add.adder_latency
     self.arch_total_adder_energy = sum(self.arch_adder_energy)+\
                                    self.global_add.adder_power*self.graph.global_adder_num*self.global_add.adder_latency
     self.arch_total_shiftreg_energy = sum(self.arch_shiftreg_energy)
     self.arch_total_iReg_energy = sum(self.arch_iReg_energy)
     self.arch_total_input_demux_energy = sum(self.arch_input_demux_energy)
     self.arch_total_output_mux_energy = sum(self.arch_output_mux_energy)
     self.arch_total_jointmodule_energy = sum(self.arch_jointmodule_energy)
     self.arch_total_buf_energy = sum(self.arch_buf_energy) + self.global_buf.buf_rpower*1e-3*self.global_buf.buf_rlatency \
                                  + self.global_buf.buf_wpower*1e-3*self.global_buf.buf_wlatency
     self.arch_total_buf_r_energy = sum(
         self.arch_buf_r_energy
     ) + self.global_buf.buf_rpower * 1e-3 * self.global_buf.buf_rlatency
     self.arch_total_buf_w_energy = sum(
         self.arch_buf_w_energy
     ) + self.global_buf.buf_wpower * 1e-3 * self.global_buf.buf_wlatency
     self.arch_total_pooling_energy = sum(self.arch_pooling_energy)
Ejemplo n.º 8
0
    def calculate_model_area(self):  #Todo: Noc area

        self.graph.tile.calculate_tile_area(
            SimConfig_path=self.SimConfig_path,
            default_inbuf_size=self.graph.max_inbuf_size,
            default_outbuf_size=self.graph.max_outbuf_size)
        self.global_buf = buffer(SimConfig_path=self.SimConfig_path,
                                 buf_level=1,
                                 default_buf_size=self.graph.global_buf_size)
        self.global_buf.calculate_buf_area()
        self.global_add = adder(SimConfig_path=self.SimConfig_path,
                                bitwidth=self.graph.global_adder_bitwidth)
        self.global_add.calculate_adder_area()
        self.tile = tile(SimConfig_path=self.SimConfig_path)
        self.tile_xbar_num = self.tile.tile_PE_total_num * self.tile.group_num * self.tile.xbar_column * self.tile.xbar_row
        self.tile_DAC_num = self.tile.tile_PE_total_num * self.tile.group_num * self.tile.xbar_row
        self.tile_ADC_num = self.tile.tile_PE_total_num * self.tile.group_num * self.tile.xbar_column
        total_tile_num = 0
        used_total_xbar_num = 0
        used_total_DAC_num = 0
        used_total_ADC_num = 0
        # not the real DAC/ADC num, but it reflects the DAC/ADC num
        for i in range(self.total_layer_num):
            layer_dict = self.NetStruct[i][0][0]
            tile_num = self.graph.layer_tileinfo[i]['tilenum']
            self.arch_area[i] = self.graph.tile.tile_area * tile_num
            self.arch_xbar_area[i] = self.graph.tile.tile_xbar_area * tile_num
            self.arch_ADC_area[i] = self.graph.tile.tile_ADC_area * tile_num
            self.arch_DAC_area[i] = self.graph.tile.tile_DAC_area * tile_num
            self.arch_digital_area[
                i] = self.graph.tile.tile_digital_area * tile_num
            self.arch_adder_area[
                i] = self.graph.tile.tile_adder_area * tile_num
            self.arch_shiftreg_area[
                i] = self.graph.tile.tile_shiftreg_area * tile_num
            self.arch_iReg_area[i] = self.graph.tile.tile_iReg_area * tile_num
            self.arch_oReg_area[i] = self.graph.tile.tile_oReg_area * tile_num
            self.arch_input_demux_area[
                i] = self.graph.tile.tile_input_demux_area * tile_num
            self.arch_output_mux_area[
                i] = self.graph.tile.tile_output_mux_area * tile_num
            self.arch_jointmodule_area[
                i] = self.graph.tile.tile_jointmodule_area * tile_num
            self.arch_buf_area[i] = self.graph.tile.tile_buffer_area * tile_num
            self.arch_pooling_area[
                i] = self.graph.tile.tile_pooling_area * tile_num
            if self.graph.layer_tileinfo[i]['type'] == 'conv':
                # only consider the utilization rate of conv layer and fc layer
                total_tile_num += tile_num
                used_xbar_num = self.graph.layer_tileinfo[i][
                    'x_width'] * self.graph.layer_tileinfo[i]['y_height']
                used_DAC_num = self.graph.layer_tileinfo[i][
                    'y_height'] * self.graph.layer_tileinfo[i][
                        'weight_precision'] * math.ceil(
                            int(layer_dict['Outputchannel']) /
                            self.tile.xbar_column)
                used_ADC_num = self.graph.layer_tileinfo[i][
                    'x_width'] * self.graph.layer_tileinfo[i]['my']
                self.arch_xbar_utilization[i] = used_xbar_num / (
                    tile_num * self.tile_xbar_num)
                self.arch_DAC_utilization[i] = used_DAC_num / (
                    tile_num * self.tile_DAC_num)
                self.arch_ADC_utilization[i] = used_ADC_num / (
                    tile_num * self.tile_ADC_num)
                used_total_xbar_num += used_xbar_num
                used_total_DAC_num += used_DAC_num
                used_total_ADC_num += used_ADC_num
            if self.graph.layer_tileinfo[i]['type'] == 'fc':
                # only consider the utilization rate of conv layer and fc layer
                total_tile_num += tile_num
                used_xbar_num = self.graph.layer_tileinfo[i][
                    'x_width'] * self.graph.layer_tileinfo[i]['y_height']
                used_DAC_num = self.graph.layer_tileinfo[i][
                    'y_height'] * self.graph.layer_tileinfo[i][
                        'weight_precision'] * math.ceil(
                            int(layer_dict['Outfeature']) /
                            self.tile.xbar_column)
                used_ADC_num = self.graph.layer_tileinfo[i][
                    'x_width'] * self.graph.layer_tileinfo[i]['my']
                self.arch_xbar_utilization[i] = used_xbar_num / (
                    tile_num * self.tile_xbar_num)
                self.arch_DAC_utilization[i] = used_DAC_num / (
                    tile_num * self.tile_DAC_num)
                self.arch_ADC_utilization[i] = used_ADC_num / (
                    tile_num * self.tile_ADC_num)
                used_total_xbar_num += used_xbar_num
                used_total_DAC_num += used_DAC_num
                used_total_ADC_num += used_ADC_num
        self.arch_total_area = sum(self.arch_area)
        self.arch_total_xbar_area = sum(self.arch_xbar_area)
        self.arch_total_ADC_area = sum(self.arch_ADC_area)
        self.arch_total_DAC_area = sum(self.arch_DAC_area)
        self.arch_total_digital_area = sum(
            self.arch_digital_area
        ) + self.global_add.adder_area * self.graph.global_adder_num
        self.arch_total_adder_area = sum(
            self.arch_adder_area
        ) + self.global_add.adder_area * self.graph.global_adder_num
        self.arch_total_shiftreg_area = sum(self.arch_shiftreg_area)
        self.arch_total_iReg_area = sum(self.arch_iReg_area)
        self.arch_total_oReg_area = sum(self.arch_oReg_area)
        self.arch_total_input_demux_area = sum(self.arch_input_demux_area)
        self.arch_total_output_mux_area = sum(self.arch_output_mux_area)
        self.arch_total_jointmodule_area = sum(self.arch_jointmodule_area)
        self.arch_total_buf_area = sum(
            self.arch_buf_area) + self.global_buf.buf_area
        self.arch_total_pooling_area = sum(self.arch_pooling_area)
        self.arch_total_xbar_utilization = used_total_xbar_num / (
            total_tile_num * self.tile_xbar_num)
        self.arch_total_DAC_utilization = used_total_DAC_num / (
            total_tile_num * self.tile_DAC_num)
        self.arch_total_ADC_utilization = used_total_ADC_num / (
            total_tile_num * self.tile_ADC_num)
Ejemplo n.º 9
0
 def calculate_model_power(self):
     self.global_buf = buffer(SimConfig_path=self.SimConfig_path,
                              buf_level=1,
                              default_buf_size=self.graph.global_buf_size)
     self.global_buf.calculate_buf_read_power()
     self.global_buf.calculate_buf_write_power()
     self.global_add = adder(SimConfig_path=self.SimConfig_path,
                             bitwidth=self.graph.global_adder_bitwidth)
     self.global_add.calculate_adder_power()
     for i in range(self.total_layer_num):
         tile_num = self.graph.layer_tileinfo[i]['tilenum']
         max_column = self.graph.layer_tileinfo[i]['max_column']
         max_row = self.graph.layer_tileinfo[i]['max_row']
         max_PE = self.graph.layer_tileinfo[i]['max_PE']
         max_group = self.graph.layer_tileinfo[i]['max_group']
         layer_type = self.graph.net[i][0][0]['type']
         self.graph.tile.calculate_tile_read_power_fast(
             max_column=max_column,
             max_row=max_row,
             max_PE=max_PE,
             max_group=max_group,
             layer_type=layer_type,
             SimConfig_path=self.SimConfig_path,
             default_inbuf_size=self.graph.max_inbuf_size,
             default_outbuf_size=self.graph.max_outbuf_size)
         self.arch_power[i] = self.graph.tile.tile_read_power * tile_num
         self.arch_xbar_power[
             i] = self.graph.tile.tile_xbar_read_power * tile_num
         self.arch_ADC_power[
             i] = self.graph.tile.tile_ADC_read_power * tile_num
         self.arch_DAC_power[
             i] = self.graph.tile.tile_DAC_read_power * tile_num
         self.arch_digital_power[
             i] = self.graph.tile.tile_digital_read_power * tile_num
         self.arch_adder_power[
             i] = self.graph.tile.tile_adder_read_power * tile_num
         self.arch_shiftreg_power[
             i] = self.graph.tile.tile_shiftreg_read_power * tile_num
         self.arch_iReg_power[
             i] = self.graph.tile.tile_iReg_read_power * tile_num
         self.arch_oReg_power[
             i] = self.graph.tile.tile_oReg_read_power * tile_num
         self.arch_input_demux_power[
             i] = self.graph.tile.tile_input_demux_read_power * tile_num
         self.arch_output_mux_power[
             i] = self.graph.tile.tile_output_mux_read_power * tile_num
         self.arch_jointmodule_power[
             i] = self.graph.tile.tile_jointmodule_read_power * tile_num
         self.arch_buf_power[
             i] = self.graph.tile.tile_buffer_read_power * tile_num
         self.arch_buf_r_power[
             i] = self.graph.tile.tile_buffer_r_read_power * tile_num
         self.arch_buf_w_power[
             i] = self.graph.tile.tile_buffer_w_read_power * tile_num
         self.arch_pooling_power[
             i] = self.graph.tile.tile_pooling_read_power * tile_num
     self.arch_total_power = sum(self.arch_power)
     self.arch_total_xbar_power = sum(self.arch_xbar_power)
     self.arch_total_ADC_power = sum(self.arch_ADC_power)
     self.arch_total_DAC_power = sum(self.arch_DAC_power)
     self.arch_total_digital_power = sum(
         self.arch_digital_power
     ) + self.global_add.adder_power * self.graph.global_adder_num
     self.arch_total_adder_power = sum(
         self.arch_adder_power
     ) + self.global_add.adder_power * self.graph.global_adder_num
     self.arch_total_shiftreg_power = sum(self.arch_shiftreg_power)
     self.arch_total_iReg_power = sum(self.arch_iReg_power)
     self.arch_total_oReg_power = sum(self.arch_oReg_power)
     self.arch_total_input_demux_power = sum(self.arch_input_demux_power)
     self.arch_total_output_mux_power = sum(self.arch_output_mux_power)
     self.arch_total_jointmodule_power = sum(self.arch_jointmodule_power)
     self.arch_total_buf_power = sum(self.arch_buf_power) + (
         self.global_buf.buf_wpower + self.global_buf.buf_rpower) * 1e-3
     self.arch_total_buf_r_power = sum(
         self.arch_buf_r_power) + self.global_buf.buf_rpower * 1e-3
     self.arch_total_buf_w_power = sum(
         self.arch_buf_w_power) + self.global_buf.buf_wpower * 1e-3
     self.arch_total_pooling_power = sum(self.arch_pooling_power)
Ejemplo n.º 10
0
 def calculate_tile_read_power_fast(self,
                                    max_column=0,
                                    max_row=0,
                                    max_PE=0,
                                    max_group=0,
                                    layer_type=None,
                                    SimConfig_path=None,
                                    default_inbuf_size=16,
                                    default_outbuf_size=4):
     # max_column: maximum used column in one crossbar in this tile
     # max_row: maximum used row in one crossbar in this tile
     # max_PE: maximum used PE in this tile
     # max_group: maximum used groups in one PE
     # unit: W
     # coarse but fast estimation
     self.tile_read_power = 0
     self.tile_xbar_read_power = 0
     self.tile_ADC_read_power = 0
     self.tile_DAC_read_power = 0
     self.tile_digital_read_power = 0
     self.tile_adder_read_power = 0
     self.tile_shiftreg_read_power = 0
     self.tile_iReg_read_power = 0
     self.tile_oReg_read_power = 0
     self.tile_input_demux_read_power = 0
     self.tile_output_mux_read_power = 0
     self.tile_jointmodule_read_power = 0
     self.tile_pooling_read_power = 0
     self.tile_buffer_read_power = 0
     self.tile_buffer_r_read_power = 0
     self.tile_buffer_w_read_power = 0
     self.tile_buffer = buffer(SimConfig_path=SimConfig_path,
                               buf_level=2,
                               default_buf_size=default_outbuf_size)
     if layer_type == 'pooling':
         self.tile_pooling.calculate_Pooling_power()
         self.tile_pooling_read_power = self.tile_pooling.Pooling_power
     elif layer_type == 'conv' or layer_type == 'fc':
         self.calculate_PE_read_power_fast(
             max_column=max_column,
             max_row=max_row,
             max_group=max_group,
             SimConfig_path=SimConfig_path,
             default_inbuf_size=default_inbuf_size)
         self.tile_xbar_read_power = max_PE * self.PE_xbar_read_power
         self.tile_ADC_read_power = max_PE * self.PE_ADC_read_power
         self.tile_DAC_read_power = max_PE * self.PE_DAC_read_power
         self.tile_adder_read_power = max_PE * self.PE_adder_read_power
         self.tile_shiftreg_read_power = max_PE * self.PE_shiftreg_read_power
         self.tile_iReg_read_power = max_PE * self.PE_iReg_read_power
         self.tile_oReg_read_power = max_PE * self.PE_oReg_read_power
         self.tile_input_demux_read_power = max_PE * self.input_demux_read_power
         self.tile_output_mux_read_power = max_PE * self.output_mux_read_power
         self.tile_jointmodule_read_power = (max_PE - 1) * math.ceil(
             max_column /
             self.output_mux) * self.tile_jointmodule.jointmodule_power
         self.tile_digital_read_power = self.tile_adder_read_power+self.tile_shiftreg_read_power+\
                 self.tile_input_demux_read_power+self.tile_output_mux_read_power+self.tile_jointmodule_read_power
         self.tile_buffer_r_read_power = max_PE * self.PE_inbuf_read_rpower
         self.tile_buffer_w_read_power = max_PE * self.PE_inbuf_read_wpower
     self.tile_buffer.calculate_buf_read_power()
     self.tile_buffer.calculate_buf_write_power()
     self.tile_buffer_r_read_power += self.tile_buffer.buf_rpower * 1e-3
     self.tile_buffer_w_read_power += self.tile_buffer.buf_wpower * 1e-3
     self.tile_buffer_read_power = self.tile_buffer_r_read_power + self.tile_buffer_w_read_power
     self.tile_digital_read_power = self.tile_adder_read_power+self.tile_shiftreg_read_power+self.tile_iReg_read_power+self.tile_oReg_read_power+\
               self.tile_input_demux_read_power+self.tile_output_mux_read_power+self.tile_jointmodule_read_power
     self.tile_read_power = self.tile_xbar_read_power+self.tile_ADC_read_power+self.tile_DAC_read_power+\
             self.tile_digital_read_power+self.tile_pooling_read_power+self.tile_buffer_read_power
Ejemplo n.º 11
0
    def __init__(self, SimConfig_path):
        # layer_num is a list with the size of 1xPE_num
        ProcessElement.__init__(self, SimConfig_path)
        tile_config = cp.ConfigParser()
        tile_config.read(SimConfig_path, encoding='UTF-8')
        self.tile_PE_num = list(
            map(int,
                tile_config.get('Tile level', 'PE_Num').split(',')))
        if self.tile_PE_num[0] == 0:
            self.tile_PE_num[0] = 4
            self.tile_PE_num[1] = 4
        assert self.tile_PE_num[0] > 0, "PE number in one PE < 0"
        assert self.tile_PE_num[1] > 0, "PE number in one PE < 0"
        self.tile_PE_total_num = self.tile_PE_num[0] * self.tile_PE_num[1]
        self.tile_simulation_level = int(
            tile_config.get('Algorithm Configuration', 'Simulation_Level'))
        self.tile_PE_list = []
        self.tile_PE_enable = []
        for i in range(self.tile_PE_num[0]):
            self.tile_PE_list.append([])
            self.tile_PE_enable.append([])
            for j in range(self.tile_PE_num[1]):
                __PE = ProcessElement(SimConfig_path)
                self.tile_PE_list[i].append(__PE)
                self.tile_PE_enable[i].append(0)
        self.layer_type = 'conv'
        self.tile_layer_num = 0
        self.tile_activation_precision = 0
        self.tile_sliding_times = 0
        self.tile_adder_num = 0
        self.tile_shiftreg_num = 0
        self.tile_jointmodule_num = 0
        self.tile_adder = adder(SimConfig_path)
        self.tile_shiftreg = shiftreg(SimConfig_path)
        self.tile_iReg = reg(SimConfig_path)
        self.tile_oReg = reg(SimConfig_path)
        self.tile_jointmodule = JointModule(SimConfig_path)
        self.tile_buffer = buffer(SimConfig_path)
        self.tile_pooling = Pooling(SimConfig_path)

        self.tile_utilization = 0
        self.num_occupied_PE = 0

        self.tile_area = 0
        self.tile_xbar_area = 0
        self.tile_ADC_area = 0
        self.tile_DAC_area = 0
        self.tile_digital_area = 0
        self.tile_adder_area = 0
        self.tile_shiftreg_area = 0
        self.tile_iReg_area = 0
        self.tile_oReg_area = 0
        self.tile_input_demux_area = 0
        self.tile_output_mux_area = 0
        self.tile_jointmodule_area = 0
        self.tile_pooling_area = 0
        self.tile_buffer_area = 0

        self.tile_read_power = 0
        self.tile_xbar_read_power = 0
        self.tile_ADC_read_power = 0
        self.tile_DAC_read_power = 0
        self.tile_digital_read_power = 0
        self.tile_adder_read_power = 0
        self.tile_shiftreg_read_power = 0
        self.tile_iReg_read_power = 0
        self.tile_oReg_read_power = 0
        self.tile_input_demux_read_power = 0
        self.tile_output_mux_read_power = 0
        self.tile_jointmodule_read_power = 0
        self.tile_pooling_read_power = 0
        self.tile_buffer_read_power = 0
        self.tile_buffer_r_read_power = 0
        self.tile_buffer_w_read_power = 0

        self.tile_write_power = 0
        self.tile_xbar_write_power = 0
        self.tile_ADC_write_power = 0
        self.tile_DAC_write_power = 0
        self.tile_digital_write_power = 0
        self.tile_adder_write_power = 0
        self.tile_shiftreg_write_power = 0
        self.tile_iReg_write_power = 0
        self.tile_input_demux_write_power = 0
        self.tile_output_mux_write_power = 0
        self.tile_jointmodule_write_power = 0

        self.tile_read_latency = 0
        self.tile_xbar_read_latency = 0
        self.tile_ADC_read_latency = 0
        self.tile_DAC_read_latency = 0
        self.tile_digital_read_latency = 0
        self.tile_adder_read_latency = 0
        self.tile_shiftreg_read_latency = 0
        self.tile_iReg_read_latency = 0
        self.tile_input_demux_read_latency = 0
        self.tile_output_mux_read_latency = 0
        self.tile_jointmodule_read_latency = 0
        # self.tile_layer_read_latency = {0:0}

        self.tile_write_latency = 0
        self.tile_xbar_write_latency = 0
        self.tile_ADC_write_latency = 0
        self.tile_DAC_write_latency = 0
        self.tile_digital_write_latency = 0
        self.tile_adder_write_latency = 0
        self.tile_shiftreg_write_latency = 0
        self.tile_iReg_write_latency = 0
        self.tile_input_demux_write_latency = 0
        self.tile_output_mux_write_latency = 0
        self.tile_jointmodule_write_latency = 0
        # self.tile_layer_write_latency = {0:0}

        self.tile_read_energy = 0
        self.tile_xbar_read_energy = 0
        self.tile_ADC_read_energy = 0
        self.tile_DAC_read_energy = 0
        self.tile_digital_read_energy = 0
        self.tile_adder_read_energy = 0
        self.tile_shiftreg_read_energy = 0
        self.tile_iReg_read_energy = 0
        self.tile_input_demux_read_energy = 0
        self.tile_output_mux_read_energy = 0
        self.tile_jointmodule_read_energy = 0

        self.tile_write_energy = 0
        self.tile_xbar_write_energy = 0
        self.tile_ADC_write_energy = 0
        self.tile_DAC_write_energy = 0
        self.tile_digital_write_energy = 0
        self.tile_adder_write_energy = 0
        self.tile_shiftreg_write_energy = 0
        self.tile_iReg_write_energy = 0
        self.tile_input_demux_write_energy = 0
        self.tile_output_mux_write_energy = 0
        self.tile_jointmodule_write_energy = 0
        # print("tile configuration is loaded")
        self.calculate_intra_PE_connection()
Ejemplo n.º 12
0
 def update_tile_buf_size(self, SimConfig_path, default_buf_size=16):
     self.tile_buffer = buffer(SimConfig_path=SimConfig_path,
                               default_buf_size=default_buf_size)
Ejemplo n.º 13
0
    def __init__(self,
                 SimConfig_path,
                 read_row=0,
                 read_column=0,
                 indata=0,
                 rdata=0,
                 inprecision=8,
                 default_buf_size=16):
        # read_row: activated WL number in crossbar
        # read_column: activated BL number in crossbar
        # indata: volume of input data (for PE) (Byte)
        # rdata: volume of data from buffer to iReg (Byte)
        # outdata: volume of output data (for PE) (Byte)
        # inprecision: input data precision of each Xbar
        # default_buf_size: default input buffer size (KB)
        PEl_config = cp.ConfigParser()
        PEl_config.read(SimConfig_path, encoding='UTF-8')
        self.inbuf = buffer(SimConfig_path=SimConfig_path,
                            buf_level=1,
                            default_buf_size=default_buf_size)
        self.PE = ProcessElement(SimConfig_path)
        self.inbuf.calculate_buf_write_latency(indata)
        self.PE_buf_wlatency = self.inbuf.buf_wlatency
        # unit: ns
        self.digital_period = 1 / float(
            PEl_config.get('Digital module', 'Digital_Frequency')) * 1e3
        self.inbuf.calculate_buf_read_latency(rdata)
        self.PE_buf_rlatency = self.inbuf.buf_rlatency
        multiple_time = math.ceil(inprecision/self.PE.DAC_precision) * math.ceil(read_row/self.PE.PE_group_DAC_num) *\
                        math.ceil(read_column/self.PE.PE_group_ADC_num)
        self.PE.calculate_xbar_read_latency()

        Transistor_Tech = int(
            PEl_config.get('Crossbar level', 'Transistor_Tech'))
        XBar_size = list(
            map(float,
                PEl_config.get('Crossbar level', 'Xbar_Size').split(',')))
        DAC_num = int(PEl_config.get('Process element level', 'DAC_Num'))
        ADC_num = int(PEl_config.get('Process element level', 'ADC_Num'))

        Row = XBar_size[0]
        Column = XBar_size[1]
        # ns  (using NVSim)
        decoderLatency_dict = {
            1: 0.27933  # 1:8, technology 65nm
        }
        decoder1_8 = decoderLatency_dict[1]
        Row_per_DAC = math.ceil(Row / DAC_num)
        m = 1
        while Row_per_DAC > 0:
            Row_per_DAC = Row_per_DAC // 8
            m += 1
        self.decoderLatency = m * decoder1_8

        # ns
        muxLatency_dict = {1: 32.744 / 1000}
        mux8_1 = muxLatency_dict[1]
        m = 1
        Column_per_ADC = math.ceil(Column / ADC_num)
        while Column_per_ADC > 0:
            Column_per_ADC = Column_per_ADC // 8
            m += 1
        self.muxLatency = m * mux8_1

        self.xbar_latency = multiple_time * self.PE.xbar_read_latency
        self.PE.calculate_DAC_latency()
        self.DAC_latency = multiple_time * self.PE.DAC_latency
        self.PE.calculate_ADC_latency()
        self.ADC_latency = multiple_time * self.PE.ADC_latency
        self.iReg_latency = math.ceil(read_row/self.PE.PE_group_DAC_num)*math.ceil(read_column/self.PE.PE_group_ADC_num)*self.digital_period+\
                            multiple_time*self.digital_period
        # write and read
        self.shiftreg_latency = multiple_time * self.digital_period
        self.input_demux_latency = multiple_time * self.decoderLatency
        self.adder_latency = math.ceil(
            read_column / self.PE.PE_group_ADC_num) * math.ceil(
                math.log2(self.PE.group_num)) * self.digital_period
        self.output_mux_latency = multiple_time * self.muxLatency
        self.computing_latency = self.DAC_latency + self.xbar_latency + self.ADC_latency
        self.oreg_latency = math.ceil(
            read_column / self.PE.PE_group_ADC_num) * self.digital_period
        self.PE_digital_latency = self.iReg_latency + self.shiftreg_latency + self.input_demux_latency + \
                                  self.adder_latency + self.output_mux_latency + self.oreg_latency
        self.PE_latency = self.PE_buf_wlatency + self.PE_buf_rlatency + self.computing_latency + self.PE_digital_latency