def instantiate(self):
        self.name = 'tb'
        self.image_size = (4, 4)
        self.filter_size = (3, 3)
        self.in_chn = 4
        self.out_chn = 8
        self.chn_per_word = 4

        self.arr_x = self.out_chn
        self.arr_y = self.in_chn

        self.input_chn = Channel()
        self.output_chn = Channel()

        ifmap_glb_depth = self.image_size[0]*self.image_size[1]* \
                self.in_chn//self.chn_per_word
        print("ifmap glb depth:", ifmap_glb_depth)
        psum_glb_depth = self.image_size[0]*self.image_size[1]* \
                self.out_chn//self.chn_per_word
        print("psum glb depth:", psum_glb_depth)

        self.stimulus = Stimulus(self.arr_x, self.arr_y, self.chn_per_word,
                                 self.input_chn, self.output_chn)
        self.dut = WSArch(self.arr_x, self.arr_y, self.input_chn,
                          self.output_chn, self.chn_per_word, ifmap_glb_depth,
                          psum_glb_depth)

        self.configuration_done = False
Exemple #2
0
    def instantiate(self):
        self.name = 'tb'

        self.input_size = 4
        self.block_size = 12
        self.in_sets = self.block_size // self.input_size
        self.num_nonzero = 5
        self.preserve_order = True

        self.in_chn = Channel()
        self.mid_chn = Channel()
        self.out_chn = Channel()

        self.converter = Converter(self.in_chn, self.mid_chn, self.input_size, self.block_size)
        #self.pruner = NaivePruner(self.mid_chn,self.out_chn,self.num_nonzero, self.block_size, self.preserve_order)
        self.pruner = ClusteredPruner(self.mid_chn,self.out_chn,self.num_nonzero, self.block_size, self.preserve_order)
        #self.pruner = ThresholdPruner(self.mid_chn,self.out_chn,self.num_nonzero, self.block_size, self.preserve_order)

        self.iterations = 10
        self.iteration = 0
        self.curr_set = 0
        self.out_counter = 0
        self.test_data = [[randint(1,5) if randint(0,3)>1 else 0\
            for j in range(self.block_size)]\
            for i in range(self.iterations+1)] 
            # send in one extra iteration to flush out last outputs
        print("Stimulus:")
        print("[")
        for i in range(len(self.test_data)-1):
            print(self.test_data[i])
        print("]")
    def instantiate(self):
        self.name = 'tb'
        self.image_size = (4, 4)
        self.filter_size = (3, 3)
        self.in_chn = 8
        self.out_chn = 16
        self.chn_per_word = 4

        self.arr_x = self.out_chn // 2
        self.arr_y = self.in_chn // 2

        self.input_chn = Channel()
        self.output_chn = Channel()
        self.psum_chn = Channel(128)
        self.curr_pass = 0
        self.tick_counter = 0

        ifmap_glb_depth = self.image_size[0]*self.image_size[1]* \
                (self.in_chn//2)//self.chn_per_word
        psum_glb_depth = self.image_size[0]*self.image_size[1]* \
                (self.out_chn//2)//self.chn_per_word

        self.stimulus = Stimulus(self.arr_x, self.arr_y, self.chn_per_word,
                                 self.input_chn, self.output_chn,
                                 self.psum_chn)
        self.dut = WSArch(self.arr_x, self.arr_y, self.input_chn,
                          self.output_chn, self.chn_per_word, ifmap_glb_depth,
                          psum_glb_depth)

        self.configuration_done = False
Exemple #4
0
    def instantiate(self, dram_wr_chn, noc_wr_chn, rd_chn, glb_depth,
                    chn_per_word):
        self.dram_wr_chn = dram_wr_chn
        self.noc_wr_chn = noc_wr_chn
        self.rd_chn = rd_chn
        self.chn_per_word = chn_per_word
        self.name = 'psum_glb'

        self.stat_type = 'show'
        self.raw_stats = {'size': (glb_depth, chn_per_word), 'rd': 0, 'wr': 0}

        self.sram = SRAM(glb_depth, chn_per_word, nports=2, name=self.name)
        self.last_read = Channel(3, name='last_read')

        self.filter_size = (0, 0)
        self.fmap_sets = 0
        self.fmap_per_iteration = 0

        self.rd_set = 0
        self.fmap_rd_idx = 0
        self.iteration = 0

        self.wr_set = 0
        self.fmap_wr_idx = 0
        self.wr_done = False
    def instantiate(self):
        self.name = 'tb'
        self.image_size = (4, 4)
        self.filter_size = (3, 3)
        self.in_chn = 4
        self.out_chn = 8
        self.chn_per_word = 4
        self.num_tiles = 4

        self.arr_x = self.out_chn
        self.arr_y = self.in_chn

        self.input_chn = Channel()
        self.output_chn = Channel()

        self.finish_signal_chn = Channel()

        self.stat_type = 'show'
        self.raw_stats = {}

        ifmap_glb_depth = self.image_size[0] * self.image_size[
            1] * self.num_tiles * self.in_chn // self.chn_per_word
        # psum_glb_depth = self.image_size[0]*self.image_size[1]*self.out_chn//self.chn_per_word
        print("ifmap glb depth:", ifmap_glb_depth)
        print("weight glb depth: 0")

        self.stimulus = Stimulus(self.arr_x, self.arr_y, self.chn_per_word,
                                 self.input_chn, self.output_chn,
                                 self.finish_signal_chn)
        self.dut = WSArch(self.arr_x, self.arr_y, self.input_chn,
                          self.output_chn, self.chn_per_word, ifmap_glb_depth)

        self.configuration_done = False
    def instantiate(self, wr_chn, rd_chn, glb_depth, chn_per_word):
        self.wr_chn = wr_chn
        self.rd_chn = rd_chn
        self.chn_per_word = chn_per_word
        self.glb_depth = glb_depth
        self.name = 'ifmap_glb'

        self.stat_type = 'show'
        self.raw_stats = {
            'size': (glb_depth, chn_per_word),
            'ifmap_glb_rd': 0,
            'ifmap_glb_wr': 0
        }

        self.sram = SRAM(glb_depth, chn_per_word)
        self.last_read = Channel(3)

        self.image_size = (0, 0)
        self.filter_size = (0, 0)
        self.fmap_sets = 0
        self.fmap_per_iteration = 0

        self.curr_set = 0
        self.fmap_idx = 0
        self.iteration = 0
        self.wr_done = False
Exemple #7
0
    def instantiate(self, dram_wr_chn, noc_wr_chn, rd_chn, glb_depth,
                    block_size, num_nonzero):
        self.dram_wr_chn = dram_wr_chn
        self.noc_wr_chn = noc_wr_chn
        self.rd_chn = rd_chn
        self.name = 'psum_glb'
        self.block_size = block_size
        self.num_nonzero = num_nonzero

        self.stat_type = 'show'
        self.raw_stats = {'size': (glb_depth, block_size), 'rd': 0, 'wr': 0}

        self.sram = SRAM(glb_depth, block_size, nports=2, dtype=np.float16)
        self.last_read = Channel(3)

        self.filter_size = (0, 0)
        self.fmap_sets = 0
        self.fmap_per_iteration = 0

        self.rd_set = 0
        self.fmap_rd_idx = 0
        self.iteration = 0

        self.wr_set = 0
        self.fmap_wr_idx = 0
        self.wr_done = False
Exemple #8
0
    def instantiate(self, wr_chn, rd_chn, glb_depth, chn_per_word):
        self.wr_chn = wr_chn
        self.rd_chn = rd_chn
        self.chn_per_word = chn_per_word
        self.name = 'ifmap_glb'

        self.stat_type = 'show'
        self.raw_stats = {'size' : (glb_depth, chn_per_word), 'rd': 0, 'wr': 0}


        self.sram = SRAM(glb_depth, chn_per_word, name=self.name)
        self.last_read = Channel(3, name='last_read')

        self.image_size = (0, 0)
        self.filter_size = (0, 0)
        self.fmap_sets = 0
        self.full_fmap_sets = 0
        self.fmap_per_iteration = 0

        self.curr_set = 0
        self.fmap_idx = 0
        self.iteration = 0
        self.tile_in = 0
        self.tile_out = 0
        self.wr_done = False
        self.task_done = True
Exemple #9
0
    def instantiate(self, arr_x, arr_y, chn_per_word, done_chn,
                    ifmap_glb_depth, psum_glb_depth, weight_glb_depth):
        self.name = 'conv_tb'

        self.image_size = None
        self.filter_size = None
        self.full_in_chn = None
        self.full_out_chn = None

        self.ceil_in_chn = None
        self.ceil_out_chn = None

        self.in_chn = arr_y
        self.out_chn = arr_x
        self.done_chn = done_chn

        self.chn_per_word = chn_per_word

        self.arr_x = self.out_chn
        self.arr_y = self.in_chn

        self.input_chn = Channel(name='arch_input_chn')
        self.output_chn = Channel(name='arch_output_chn')

        self.stimulus = Stimulus(self.arr_x, self.arr_y, self.chn_per_word,
                                 self.input_chn, self.output_chn,
                                 self.done_chn)
        self.dut = WSArch(self.arr_x, self.arr_y, self.input_chn,
                          self.output_chn, self.chn_per_word, ifmap_glb_depth,
                          psum_glb_depth, weight_glb_depth)
Exemple #10
0
class ConverterTB(Module):
    def instantiate(self):
        self.name = 'tb'

        self.input_size = 4
        self.block_size = 12
        self.in_sets = self.block_size // self.input_size
        self.num_nonzero = 5
        self.preserve_order = True

        self.in_chn = Channel()
        self.mid_chn = Channel()
        self.out_chn = Channel()

        self.converter = Converter(self.in_chn, self.mid_chn, self.input_size, self.block_size)
        #self.pruner = NaivePruner(self.mid_chn,self.out_chn,self.num_nonzero, self.block_size, self.preserve_order)
        self.pruner = ClusteredPruner(self.mid_chn,self.out_chn,self.num_nonzero, self.block_size, self.preserve_order)
        #self.pruner = ThresholdPruner(self.mid_chn,self.out_chn,self.num_nonzero, self.block_size, self.preserve_order)

        self.iterations = 10
        self.iteration = 0
        self.curr_set = 0
        self.out_counter = 0
        self.test_data = [[randint(1,5) if randint(0,3)>1 else 0\
            for j in range(self.block_size)]\
            for i in range(self.iterations+1)] 
            # send in one extra iteration to flush out last outputs
        print("Stimulus:")
        print("[")
        for i in range(len(self.test_data)-1):
            print(self.test_data[i])
        print("]")

    def tick(self):
        if (self.in_chn.vacancy() and not self.iteration == self.iterations+1):
            imin = self.curr_set*self.input_size
            imax = imin+self.input_size
            data = [self.test_data[self.iteration][i] for i in range(imin, imax)]
            self.in_chn.push(data)

            self.curr_set += 1
            if (self.curr_set == self.in_sets):
                self.curr_set = 0
                self.iteration += 1
        if (self.out_chn.valid()):
            data = self.out_chn.pop()
            print(data)
            #print("out_counter: ", self.out_counter)
            self.out_counter += 1
            if (self.out_counter == self.iterations):
                raise Finish("Check manually")
Exemple #11
0
    def instantiate(self, image_size, filter_size, in_chn, out_chn, block_size,
                    ifmap, weights, bias, pruner_name, num_nonzero):
        self.name = 'tb'

        # if (debug):
        #     self.image_size = (4, 4)
        #     self.filter_size = (3, 3)
        #     self.in_chn = 2
        #     self.out_chn = 4
        #     self.block_size = 2
        #     self.num_nonzero = 1  #number of non-zero values in each blok, help test the correctness of the arch
        # else:
        #     self.image_size = (16, 16)
        #     self.filter_size = (3, 3)
        #     self.in_chn = 16
        #     self.out_chn = 8
        #     self.block_size = 4
        #     self.num_nonzero = 4

        self.image_size = image_size
        self.filter_size = filter_size
        self.in_chn = in_chn
        self.out_chn = out_chn
        self.block_size = block_size
        self.num_nonzero = num_nonzero  #number of non-zero values in each blok, help test the correctness of the arch

        #the inputs to this specific layer
        self.ifmap = ifmap
        self.weights = weights
        self.bias = bias
        self.pruner_name = pruner_name

        self.arr_y = self.out_chn
        self.input_chn = Channel()
        self.output_chn = Channel()

        ifmap_glb_depth = (self.filter_size[1] + (self.filter_size[0]-1)*\
            self.image_size[1]) * self.in_chn // self.block_size
        psum_glb_depth = self.out_chn // self.block_size
        weight_glb_depth = self.filter_size[0]*self.filter_size[1]* \
                self.in_chn*self.out_chn//self.block_size

        self.stimulus = Stimulus(self.arr_y, self.block_size, self.num_nonzero,
                                 self.input_chn, self.output_chn,
                                 self.pruner_name)
        self.dut = OSArch(self.arr_y, self.input_chn, self.output_chn,
                          self.block_size, self.num_nonzero, ifmap_glb_depth,
                          psum_glb_depth, weight_glb_depth)

        self.configuration_done = False
Exemple #12
0
class ChannelTB(Module):
    def instantiate(self):
        self.channel = Channel(4)
        self.push_count = 0
        self.free_count = 0
        self.test_size = 100

    def tick(self):
        # Print current state of the channel
        c, n = [], 0
        while (self.channel.valid(n)):
            d = self.channel.peek(n)
            assert (d == (self.free_count + n))
            c.append(d)
            n += 1
        print("channel: %s" % c)

        # Possibly push a new element
        if random.random() < 0.5 and self.push_count < self.test_size and \
                self.channel.vacancy():
            self.channel.push(self.push_count)
            print("push: %d" % self.push_count)
            self.push_count += 1

        # Possibly free some elements
        if random.random() < 0.5 and self.free_count < self.test_size and \
                n != 0:
            num_free = random.randint(1, n)
            self.channel.free(num_free)
            self.free_count += num_free
            print("free: %d" % num_free)
Exemple #13
0
    def instantiate(self, arch_input_chn, arr_y, block_size, num_nonzero,
                    pruner_name):
        # PE static configuration (immutable)
        #self.arr_x = arr_x
        self.arr_y = arr_y
        #self.chn_per_word = chn_per_word
        self.block_size = block_size
        self.num_nonzero = num_nonzero

        self.convert_chn = Channel()
        self.prune_chn = Channel()
        self.arch_input_chn = arch_input_chn

        # Although both InputSerializer and pruner will be pushing to arch_input_chn
        # There is no conflict issue because all weights will be pushed by IS first
        # then all inputs by pruner
        self.converter = Converter(self.convert_chn, self.prune_chn, \
            self.block_size, self.block_size)
        # self.pruner = NaivePruner(self.prune_chn,self.arch_input_chn, \
        #     self.num_nonzero,True)

        #user defined pruner for this layer, default to naive pruner
        self.pruner = getattr(pruner, pruner_name)(self.prune_chn,self.arch_input_chn, \
            self.num_nonzero, self.block_size, True)

        self.ifmap = None
        self.weights = None
        self.bias = None

        self.image_size = (0, 0)
        self.filter_size = (0, 0)

        self.ifmap_psum_done = True
        self.pass_done = Reg(False)

        # State Counters
        self.curr_set = 0
        self.curr_filter = 0
        self.iteration = 0
        self.fmap_idx = 0
        self.curr_chn = 0
        self.curr_x = 0  # run through first two dimensions of input
        self.curr_y = 0
        self.bias_set = 0
Exemple #14
0
    def instantiate(self, wr_chn, rd_chn, glb_depth, block_size):
        self.wr_chn = wr_chn
        self.rd_chn = rd_chn
        self.name = 'weight_glb'

        self.filter_size = (0, 0)
        self.image_size = (0, 0)
        self.wr_done = False
        self.iteration = 0
        self.addr = 0
        self.in_chn = 0
        self.out_chn = 0
        #self.arr_y = 0
        #self.out_sets = 0
        self.block_size = block_size

        self.sram = SRAM(glb_depth, block_size)
        self.last_read = Channel(3)

        self.stat_type = 'show'
        self.raw_stats = {'size': (glb_depth, block_size), 'rd': 0, 'wr': 0}
Exemple #15
0
    def instantiate(self, wr_chn, rd_chn, arr_y, glb_depth, block_size,
                    num_nonzero):
        self.wr_chn = wr_chn
        self.rd_chn = rd_chn
        self.arr_y = arr_y
        self.block_size = block_size
        self.num_nonzero = num_nonzero
        self.name = 'ifmap_glb'

        self.stat_type = 'show'
        self.raw_stats = {'size': (glb_depth, num_nonzero), 'rd': 0, 'wr': 0}

        self.sram = SRAM(glb_depth, num_nonzero * 3)
        self.last_read = Channel(3)
        self.glb_depth = glb_depth

        self.image_size = (0, 0)
        self.filter_size = (0, 0)
        self.fmap_sets = 0
        self.fmap_per_iteration = 0

        self.curr_set = 0
        self.fmap_idx = 0
        self.iteration = 0
        self.wr_done = False

        # For managing convolution
        self.curr_x = 0
        self.curr_y = 0
        self.curr_chn = 0
        self.request_idx = 0
        self.send_idx = 0
        #self.curr_filt_x = 0
        #self.curr_filt_y = 0
        self.ifmap_done = False

        self.needed_addr = 0
        self.ready_to_output = False  # ready to output a filter_size block of inputs
        self.curr_data = [0 for i in range(3 * num_nonzero)]
        self.data_idx = num_nonzero  # block other operations while actively working through data
Exemple #16
0
    def instantiate(self, arr_x, arr_y, chn_per_word, done_chn, ifmap_glb_depth, psum_glb_depth, weight_glb_depth):
        self.name = 'fc_tb'
        self.arr_x = arr_x
        self.arr_y = arr_y
        self.chn_per_word = chn_per_word

        self.batch_size = None
        self.input_size = None
        self.output_size = None

        self.ceil_batch = None
        self.ceil_output = None

        self.input_chn = Channel(name='arch_input_chn')
        self.output_chn = Channel(name='arch_output_chn')
        self.done_chn = done_chn

        self.stimulus = Stimulus(self.arr_x, self.arr_y, self.chn_per_word,
            self.input_chn, self.output_chn, self.done_chn)
        self.dut = OSArch(self.arr_x, self.arr_y, self.input_chn,
                self.output_chn, self.chn_per_word, ifmap_glb_depth,
                weight_glb_depth)
Exemple #17
0
class MetaArchTB(Module):
    def instantiate(self, arr_x, arr_y, chn_per_word, layers, batch_size):
        self.arr_x = arr_x
        self.arr_y = arr_y
        self.chn_per_word = chn_per_word
        self.layers = layers
        self.batch_size = batch_size

        self.name = 'meta'

        self.started = False
        self.done_chn = Channel()

        self.ifmap_glb_depth = 0
        self.psum_glb_depth = 0
        self.weights_glb_depth = 0

        use_conv = False
        use_fc = False

        self.conv_tb = None
        self.fc_tb = None

        cur_image_size = None
        cur_in_chn = None
        is_conv = False

        num_convs = 0
        num_fc = 0

        for layer in self.layers:
            if isinstance(layer, Conv):
                if cur_image_size is None:
                    pass
                elif cur_image_size != layer.image_size or cur_in_chn != layer.in_chn:
                    raise Exception('Invalid conv image size for %s: %s %s' %
                                    (layer.name, (cur_image_size, cur_in_chn),
                                     (layer.image_size, layer.in_chn)))
                ifmap_glb_depth, psum_glb_depth, weights_glb_depth = WSArchTB.required_glb_depth(
                    self.arr_x, self.arr_y, self.chn_per_word,
                    layer.image_size, layer.filter_size, layer.in_chn,
                    layer.out_chn)
                use_conv = True
                output_shape = layer.new_shape((self.batch_size, ) +
                                               layer.image_size +
                                               (layer.out_chn, ))
                cur_image_size = output_shape[1:3]
                cur_in_chn = output_shape[3]
                is_conv = True
                num_convs += 1
            elif isinstance(layer, FC):
                if cur_image_size is None:
                    pass
                elif not is_conv and cur_image_size != layer.input_size:
                    raise Exception('Invalid fc dimension transition for ' +
                                    layer.name)
                elif is_conv and cur_image_size[0] * cur_image_size[
                        1] * cur_in_chn != layer.input_size:
                    raise Exception(
                        'Invalid conv to fc dimension transition to ' +
                        layer.name)
                ifmap_glb_depth, psum_glb_depth, weights_glb_depth = OSArchTB.required_glb_depth(
                    self.arr_x, self.arr_y, self.chn_per_word, self.batch_size,
                    layer.input_size, layer.output_size)
                use_fc = True
                _, cur_image_size = layer.new_shape(
                    (self.batch_size, layer.output_size))
                is_conv = False
                num_fc += 1
            else:
                raise Exception('layer not valid')
            self.ifmap_glb_depth = max(self.ifmap_glb_depth, ifmap_glb_depth)
            self.psum_glb_depth = max(self.psum_glb_depth, psum_glb_depth)
            self.weights_glb_depth = max(self.weights_glb_depth,
                                         weights_glb_depth)

        if use_conv:
            self.conv_tb = WSArchTB(self.arr_x, self.arr_y, self.chn_per_word,
                                    self.done_chn, self.ifmap_glb_depth,
                                    self.psum_glb_depth,
                                    self.weights_glb_depth)
        if use_fc:
            self.fc_tb = OSArchTB(self.arr_x, self.arr_y, self.chn_per_word,
                                  self.done_chn, self.ifmap_glb_depth,
                                  self.psum_glb_depth, self.weights_glb_depth)

        self.layer_step = 0
        self.batch_step = 0
        self.conv_inputs = [None] * self.batch_size
        self.fc_input = None

        self.conv_weights = [None] * num_convs
        self.conv_bias = [None] * num_convs

        self.fc_weights = [None] * num_fc
        self.fc_bias = [None] * num_fc

        self.cur_conv = 0
        self.cur_fc = 0

    def tick(self):
        if not self.started or self.done_chn.valid():
            self.started = True
            old_layer = self.layers[self.layer_step]

            if self.done_chn.valid():
                valid = self.done_chn.pop()
                if not valid:
                    raise Finish('Validation Failed')
                if isinstance(old_layer, Conv):
                    self.conv_inputs[
                        self.batch_step] = self.conv_tb.get_output()
                    self.batch_step += 1
                    if self.batch_step == self.batch_size:
                        self.conv_inputs = [
                            batch for batch in old_layer.activation(
                                np.array(self.conv_inputs))
                        ]
                        self.batch_step = 0
                        self.layer_step += 1
                        self.cur_conv += 1
                else:
                    self.fc_input = self.fc_tb.get_output()
                    self.fc_input = old_layer.activation(self.fc_input)
                    self.layer_step += 1
                    self.cur_fc += 1
                if self.layer_step == len(self.layers):
                    raise Finish('Success')

            layer = self.layers[self.layer_step]

            # handle conv to fc transition
            if isinstance(
                    layer, FC
            ) and self.fc_input is None and self.conv_inputs[0] is not None:
                if self.name != None:
                    self.output_file.write("FC MODE\n")
                self.fc_input = np.zeros(
                    (self.batch_size, layer.input_size)).astype(np.int64)
                for i in range(self.batch_size):
                    self.fc_input[i] = self.conv_inputs[i].reshape(
                        layer.input_size)

            if isinstance(layer, Conv):
                if self.name != None:
                    self.output_file.write("CONV MODE\n")
                if self.conv_inputs[self.batch_step] is None:
                    _, weights, bias = self.conv_tb.configure(
                        layer.image_size, layer.filter_size, layer.in_chn,
                        layer.out_chn)
                    self.conv_weights[self.cur_conv] = weights
                    self.conv_bias[self.cur_conv] = bias
                elif self.conv_weights[
                        self.cur_conv] is None or self.conv_bias[
                            self.cur_conv] is None:
                    weights, bias = self.conv_tb.configure_fixed_image(
                        self.conv_inputs[self.batch_step], layer.filter_size,
                        layer.in_chn, layer.out_chn)
                    self.conv_weights[self.cur_conv] = weights
                    self.conv_bias[self.cur_conv] = bias
                else:
                    self.conv_tb.configure_fixed(
                        self.conv_inputs[self.batch_step],
                        self.conv_weights[self.cur_conv],
                        self.conv_bias[self.cur_conv])

            elif isinstance(layer, FC):
                if self.fc_input is None:
                    _, weights, bias = self.fc_tb.configure(
                        self.batch_size, layer.input_size, layer.output_size)
                    self.fc_weights[self.cur_fc] = weights
                    self.fc_bias[self.cur_fc] = bias
                elif self.fc_weights[self.cur_fc] is None or self.fc_bias[
                        self.cur_fc] is None:
                    weights, bias = self.fc_tb.configure_fixed_image(
                        self.fc_input, layer.output_size)
                    self.fc_weights[self.cur_fc] = weights
                    self.fc_bias[self.cur_fc] = bias
                else:
                    self.fc_tb.configure_fixed(self.fc_input,
                                               self.fc_weights[self.cur_fc],
                                               self.fc_bias[self.cur_fc])
            else:
                raise Exception('layer not valid')
Exemple #18
0
class InputSerializer(Module):
    def instantiate(self, arch_input_chn, arr_y, block_size, num_nonzero,
                    pruner_name):
        # PE static configuration (immutable)
        #self.arr_x = arr_x
        self.arr_y = arr_y
        #self.chn_per_word = chn_per_word
        self.block_size = block_size
        self.num_nonzero = num_nonzero

        self.convert_chn = Channel()
        self.prune_chn = Channel()
        self.arch_input_chn = arch_input_chn

        # Although both InputSerializer and pruner will be pushing to arch_input_chn
        # There is no conflict issue because all weights will be pushed by IS first
        # then all inputs by pruner
        self.converter = Converter(self.convert_chn, self.prune_chn, \
            self.block_size, self.block_size)
        # self.pruner = NaivePruner(self.prune_chn,self.arch_input_chn, \
        #     self.num_nonzero,True)

        #user defined pruner for this layer, default to naive pruner
        self.pruner = getattr(pruner, pruner_name)(self.prune_chn,self.arch_input_chn, \
            self.num_nonzero, self.block_size, True)

        self.ifmap = None
        self.weights = None
        self.bias = None

        self.image_size = (0, 0)
        self.filter_size = (0, 0)

        self.ifmap_psum_done = True
        self.pass_done = Reg(False)

        # State Counters
        self.curr_set = 0
        self.curr_filter = 0
        self.iteration = 0
        self.fmap_idx = 0
        self.curr_chn = 0
        self.curr_x = 0  # run through first two dimensions of input
        self.curr_y = 0
        self.bias_set = 0
        #self.send_bias = False

    def configure(self, ifmap, weights, bias, in_chn, out_chn, image_size,
                  filter_size):
        self.ifmap = ifmap
        self.weights = weights
        self.bias = bias

        self.in_chn = in_chn
        self.out_chn = out_chn

        self.image_size = image_size
        self.filter_size = filter_size

        self.ifmap_psum_done = False
        self.weights_done = False
        self.pass_done.wr(False)

        # State Counters
        self.curr_set = 0
        self.curr_filter = 0
        self.iteration = 0
        self.curr_chn = 0
        self.curr_x = 0  # run through first two dimensions of input
        self.curr_y = 0
        self.bias_set = 0
        #self.send_bias = False

    def tick(self):
        if self.pass_done.rd():
            return

        if self.ifmap_psum_done:
            if self.convert_chn.vacancy():
                data = np.zeros(self.block_size)
                self.convert_chn.push(data)
            return

        in_sets = self.in_chn // self.block_size
        out_sets = self.out_chn // self.block_size
        num_iteration = self.filter_size[0] * self.filter_size[1]

        # read and hold all weights at the beginning for ease of implementation
        if not self.weights_done:
            f_x = self.iteration // self.filter_size[0]
            f_y = self.iteration % self.filter_size[0]

            # Push filters to PE columns. (PE is responsible for pop)
            if self.arch_input_chn.vacancy(
            ) and self.iteration < num_iteration:
                cmin = self.curr_filter * self.block_size
                cmax = cmin + self.block_size
                data = np.array([self.weights[f_x, f_y, self.curr_chn, c] \
                        for c in range(cmin, cmax) ])
                #print("{},{},{},{}-{}".format(f_x,f_y,self.curr_chn,cmin,cmax))
                #print(data)
                self.arch_input_chn.push(
                    data)  # Gives groups of four along num_filters axis

                self.curr_filter += 1
                if (self.curr_filter == out_sets
                    ):  # Loop through blocks of filters
                    self.curr_filter = 0
                    self.curr_chn += 1
                if (self.curr_chn == self.in_chn):  # Loop through channels
                    self.curr_chn = 0
                    self.iteration += 1
                if (self.iteration == num_iteration
                    ):  # Loop through 2D filter support
                    self.iteration = 0
                    #print("Weights done")
                    self.weights_done = True

        elif self.arch_input_chn.vacancy() and self.bias_set < out_sets:
            cmin = self.bias_set * self.block_size
            cmax = cmin + self.block_size
            data = np.array([self.bias[c] for c in range(cmin, cmax)])
            #print("bias (input serializer):")
            #print(data)
            self.arch_input_chn.push(data)
            self.bias_set += 1
        elif not self.ifmap_psum_done:
            if self.convert_chn.vacancy():
                cmin = self.curr_set * self.block_size
                cmax = cmin + self.block_size

                #xmin = x
                #xmax = x+self.arr_x
                # Write ifmap to glb
                #data = np.array([ self.ifmap[x, self.curr_y, self.curr_chn] for x in range(xmin, xmax) ])
                data = np.array([
                    self.ifmap[self.curr_x, self.curr_y, c]
                    for c in range(cmin, cmax)
                ])
                #print("{},{},{}-{}".format(self.curr_x, self.curr_y, cmin, cmax))
                #print(data)

                self.curr_set += 1
                if (self.curr_set == in_sets):
                    self.curr_set = 0
                    self.curr_y += 1
                if (self.curr_y == self.image_size[1]):
                    self.curr_y = 0
                    self.curr_x += 1

                self.convert_chn.push(data)

                if (self.curr_x == self.image_size[0]):
                    self.curr_x = 0
                    self.ifmap_psum_done = True
Exemple #19
0
class IFMapGLB(Module):
    def instantiate(self, wr_chn, rd_chn, glb_depth, chn_per_word):
        self.wr_chn = wr_chn
        self.rd_chn = rd_chn
        self.chn_per_word = chn_per_word
        self.name = 'ifmap_glb'

        self.stat_type = 'show'
        self.raw_stats = {'size': (glb_depth, chn_per_word), 'rd': 0, 'wr': 0}

        self.sram = SRAM(glb_depth, chn_per_word, name=self.name)
        self.last_read = Channel(3, name='last_read')

        self.image_size = (0, 0)
        self.filter_size = (0, 0)
        self.fmap_sets = 0
        self.fmap_per_iteration = 0

        self.curr_set = 0
        self.fmap_idx = 0
        self.iteration = 0
        self.wr_done = False

    def configure(self, image_size, filter_size, fmap_sets,
                  fmap_per_iteration):
        self.wr_done = False

        self.image_size = image_size
        self.filter_size = filter_size
        self.fmap_sets = fmap_sets
        self.fmap_per_iteration = fmap_per_iteration

    def tick(self):
        # self.iteration is which weight we are currently using
        # It's weight stationary so we fully use a set of filter weights
        # before continuing on.
        # (first weight in each filter, second weight in each filter, etc...)
        num_iteration = self.filter_size[0] * self.filter_size[1]
        offset_x = (self.filter_size[0] - 1) // 2
        offset_y = (self.filter_size[1] - 1) // 2
        filter_x = self.iteration % self.filter_size[0] - offset_x
        filter_y = self.iteration // self.filter_size[0] - offset_y

        # This is the first tick since initializing
        # INITIALIZATION CODE
        # Write all ifmaps and psums? to sram
        if not self.wr_done:
            # Write to GLB
            if self.wr_chn.valid():
                data = self.wr_chn.pop()
                # print "ifmap_glb wr"
                self.raw_stats['wr'] += 1
                # Write ifmap to glb
                # print "ifmap_to_glb: ", in_sets, self.fmap_idx, self.curr_set
                addr = self.fmap_sets * self.fmap_idx + self.curr_set
                self.curr_set += 1
                self.sram.request(WR, addr, data)
                if self.curr_set == self.fmap_sets:
                    self.curr_set = 0
                    self.fmap_idx += 1
                if self.fmap_idx == self.fmap_per_iteration:
                    # Done initializing ifmaps and psums
                    # self.sram.dump()
                    self.fmap_idx = 0
                    self.wr_done = True
        else:
            # Read from GLB and deal with SRAM latency
            if self.rd_chn.vacancy(1) and self.iteration < num_iteration:
                fmap_x = self.fmap_idx % self.image_size[0]
                fmap_y = self.fmap_idx // self.image_size[0]
                ifmap_x, ifmap_y = (fmap_x + filter_x, fmap_y + filter_y)
                if (ifmap_x < 0) or (ifmap_x >= self.image_size[0]) or \
                        (ifmap_y < 0) or (ifmap_y >= self.image_size[1]):
                    # print "ifmap req zero", self.iteration, self.fmap_idx
                    self.last_read.push(True)
                else:
                    fmap_idx = (ifmap_y * self.image_size[0]) + ifmap_x
                    addr = self.fmap_sets * fmap_idx + self.curr_set
                    # print "ifmap req glb", self.iteration, self.fmap_idx
                    self.sram.request(RD, addr)
                    self.last_read.push(False)
                self.curr_set += 1

                if self.curr_set == self.fmap_sets:
                    self.curr_set = 0
                    self.fmap_idx += 1
                if self.fmap_idx == self.fmap_per_iteration:
                    self.fmap_idx = 0
                    self.iteration += 1

            # Process the last read sent to the GLB SRAM
            if self.last_read.valid():
                is_zero = self.last_read.pop()
                # self.raw_stats['rd'] += 1
                data = [0]*self.chn_per_word if is_zero else \
                        [e for e in self.sram.response()]
                # print "ifmap rd glb", data
                self.rd_chn.push(data)
                self.raw_stats['rd'] += 1
Exemple #20
0
class WeightsGLB(Module):
    def instantiate(self, wr_chn, rd_chn, glb_depth, chn_per_word):
        self.wr_chn = wr_chn
        self.rd_chn = rd_chn
        self.chn_per_word = chn_per_word
        self.name = 'weight_glb'

        self.stat_type = 'show'
        self.raw_stats = {'size' : (glb_depth, chn_per_word), 'rd': 0, 'wr': 0}

        self.sram = SRAM(glb_depth, chn_per_word, name=self.name)
        self.last_read = Channel(3, name='last_read')

        self.filter_size = (0, 0)
        self.in_sets = 0
        self.out_sets = 0

        self.curr_set = 0
        self.fmap_idx = 0
        self.iteration = 0
        self.tile = 0
        self.wr_done = False

    def configure(self, filter_size, in_sets, out_sets):
        self.wr_done = False

        self.filter_size = filter_size
        self.in_sets = in_sets
        self.out_sets = out_sets
        self.tile = 0
        self.stuff = []

    def tick(self):
        num_iteration = self.filter_size[0]*self.filter_size[1]

        if not self.wr_done:
            # Write to GLB
            if self.wr_chn.valid():
                data = self.wr_chn.pop()
                # print "ifmap_glb wr"
                # Write ifmap to glb
                # print "ifmap_to_glb: ", in_sets, self.fmap_idx, self.curr_set
                addr = self.in_sets*(self.out_sets*self.iteration+self.fmap_idx) + self.curr_set
                self.stuff.append(data)
                self.curr_set += 1
                self.sram.request(WR, addr, data)
                self.raw_stats['wr'] += len(data)
                if self.curr_set == self.in_sets:
                    self.curr_set = 0
                    self.fmap_idx += 1
                if self.fmap_idx == self.out_sets:
                    # Done initializing ifmaps and psums
                    # self.sram.dump()
                    self.fmap_idx = 0
                    self.iteration += 1
                    if self.iteration == num_iteration:
                        self.iteration = 0
                        self.wr_done = True
        else:
            did_read = False
            # Read from GLB and deal with SRAM latency
            if self.rd_chn.vacancy(1) and self.iteration < num_iteration:
                addr = self.in_sets*(self.out_sets*self.iteration+self.fmap_idx) + self.curr_set
                # print "ifmap req glb", self.iteration, self.fmap_idx
                self.sram.request(RD, addr)
                self.raw_stats['rd'] += self.chn_per_word
                self.last_read.push(False)
                did_read = True
                self.curr_set += 1

                if self.curr_set == self.in_sets:
                    self.curr_set = 0
                    self.fmap_idx += 1
                if self.fmap_idx == self.out_sets:
                    self.fmap_idx = 0
                    self.iteration += 1

            # Process the last read sent to the GLB SRAM
            if self.last_read.valid():
                is_zero = self.last_read.pop()
                data = [0]*self.chn_per_word if is_zero else \
                        [e for e in self.sram.response()]
                # print "ifmap rd glb", data
                self.rd_chn.push(data)
            elif not did_read:
                if self.iteration == num_iteration:
                    self.iteration = 0
                    self.wr_done = False
Exemple #21
0
    def instantiate(self, arr_x, arr_y, input_chn, output_chn, chn_per_word,
                    ifmap_glb_depth, psum_glb_depth):
        # PE static configuration (immutable)
        self.name = 'chip'
        self.arr_x = arr_x
        self.arr_y = arr_y
        self.chn_per_word = chn_per_word

        self.stat_type = 'show'

        # Instantiate DRAM IO channels
        self.input_chn = input_chn
        self.output_chn = output_chn

        # Instantiate input deserializer and output serializer
        self.ifmap_wr_chn = Channel(name="ifmap_wr_chn")
        self.psum_wr_chn = Channel(name="psum_wr_chn")
        self.weights_wr_chn = Channel(name="weights_wr_chn")
        self.deserializer = InputDeserializer(self.input_chn,
                                              self.ifmap_wr_chn,
                                              self.weights_wr_chn,
                                              self.psum_wr_chn, arr_x, arr_y,
                                              chn_per_word)

        self.psum_output_chn = Channel(name="psum_output_chn")
        self.serializer = OutputSerializer(self.output_chn,
                                           self.psum_output_chn)

        # Instantiate GLB and GLB channels
        self.ifmap_rd_chn = Channel(3, name='ifmap_rd_chn')
        self.ifmap_glb = IFMapGLB(self.ifmap_wr_chn, self.ifmap_rd_chn,
                                  ifmap_glb_depth, chn_per_word)

        self.psum_rd_chn = Channel(3, name='psum_rd_chn')
        self.psum_noc_wr_chn = Channel(name='psum_noc_wr_chn')
        self.psum_glb = PSumGLB(self.psum_wr_chn, self.psum_noc_wr_chn,
                                self.psum_rd_chn, psum_glb_depth, chn_per_word)

        self.weights_rd_chn = Channel(name='weights_rd_chn')
        self.weights_glb = WeightsGLB(self.weights_wr_chn, self.weights_rd_chn)

        # PE Array and local channel declaration
        self.pe_array = ModuleList()
        self.pe_ifmap_chns = ModuleList()
        self.pe_filter_chns = ModuleList()
        self.pe_psum_chns = ModuleList()
        self.pe_psum_chns.append(ModuleList())
        for x in range(self.arr_x):
            self.pe_psum_chns[0].append(
                Channel(32, name='pe_psum_chns_{}_{}'.format(x, 0)))

        # Actual array instantiation
        for y in range(self.arr_y):
            self.pe_array.append(ModuleList())
            self.pe_ifmap_chns.append(ModuleList())
            self.pe_filter_chns.append(ModuleList())
            self.pe_psum_chns.append(ModuleList())
            for x in range(self.arr_x):
                self.pe_ifmap_chns[y].append(
                    Channel(32, name='pe_ifmap_chns_{}_{}'.format(x, y)))
                self.pe_filter_chns[y].append(
                    Channel(32, name='pe_filter_chns_{}_{}'.format(x, y)))
                self.pe_psum_chns[y + 1].append(
                    Channel(32, name='pe_psum_chns_{}_{}'.format(x, y)))
                self.pe_array[y].append(
                    PE(x, y, self.pe_ifmap_chns[y][x],
                       self.pe_filter_chns[y][x], self.pe_psum_chns[y][x],
                       self.pe_psum_chns[y + 1][x]))

        # Setup NoC to deliver weights, ifmaps and psums
        self.filter_noc = WeightsNoC(self.weights_rd_chn, self.pe_filter_chns,
                                     self.chn_per_word)
        self.ifmap_noc = IFMapNoC(self.ifmap_rd_chn, self.pe_ifmap_chns,
                                  self.arr_x, self.chn_per_word)
        self.psum_rd_noc = PSumRdNoC(self.psum_rd_chn, self.pe_psum_chns[0],
                                     self.chn_per_word)
        self.psum_wr_noc = PSumWrNoC(self.pe_psum_chns[-1],
                                     self.psum_noc_wr_chn,
                                     self.psum_output_chn, self.chn_per_word)
Exemple #22
0
class IFMapGLB(Module):
    def instantiate(self, wr_chn, rd_chn, glb_depth, chn_per_word):
        self.wr_chn = wr_chn
        self.rd_chn = rd_chn
        self.chn_per_word = chn_per_word
        self.name = 'ifmap_glb'

        self.stat_type = 'show'
        self.raw_stats = {'size' : (glb_depth, chn_per_word), 'ifmap_glb_rd': 0, 'ifmap_glb_wr': 0}


        self.sram = SRAM(glb_depth, chn_per_word)
        self.last_read = Channel(3)

        self.image_size = (0, 0)
        self.filter_size = (0, 0)
        self.fmap_sets = 0
        self.fmap_per_iteration = 0

        self.curr_set = 0
        self.fmap_idx = 0
        self.iteration = 0
        self.wr_done = False

    def configure(self, image_size, filter_size, fmap_sets, fmap_per_iteration):
        self.wr_done = False

        self.image_size = image_size
        self.filter_size = filter_size
        self.fmap_sets = fmap_sets
        self.fmap_per_iteration = fmap_per_iteration

        self.read_ctr = 0

    def tick(self):
        num_iteration = self.filter_size[0]*self.filter_size[1]
        offset_x = (self.filter_size[0] - 1)//2
        offset_y = (self.filter_size[1] - 1)//2
        filter_x = self.iteration % self.filter_size[0] - offset_x
        filter_y = self.iteration // self.filter_size[0] - offset_y

        if not self.wr_done:
            # Write to GLB
            if self.wr_chn.valid():
                data = self.wr_chn.pop()
                self.raw_stats['ifmap_glb_wr'] += len(data)
                # print "ifmap_glb wr"
                # Write ifmap to glb
                addr = self.fmap_sets*self.fmap_idx + self.curr_set
                # print("ifmap_to_glb: fmap idx, curr set, addr ",  self.fmap_idx, self.curr_set, addr)
                self.curr_set += 1
                self.sram.request(WR, addr, data)
                if self.curr_set == self.fmap_sets:
                    self.curr_set = 0
                    self.fmap_idx += 1
                if self.fmap_idx == self.fmap_per_iteration:
                    # Done initializing ifmaps and psums
                    # self.sram.dump()
                    self.fmap_idx = 0
                    self.wr_done = True
        else:
            # Read from GLB and deal with SRAM latency
            if self.rd_chn.vacancy(1) and self.iteration < num_iteration:

                self.read_ctr += 1
                #print("ifmap glb read ctr ", self.read_ctr)

                fmap_x = self.fmap_idx % self.image_size[0]
                fmap_y = self.fmap_idx  // self.image_size[0]
                ifmap_x, ifmap_y = (fmap_x + filter_x, fmap_y + filter_y)
                if (ifmap_x < 0) or (ifmap_x >= self.image_size[0]) or \
                        (ifmap_y < 0) or (ifmap_y >= self.image_size[1]):
                    # print("ifmap req zero: iter, fmap idx ", self.iteration, self.fmap_idx)
                    self.last_read.push(True)
                else:
                    fmap_idx = (ifmap_y*self.image_size[0]) + ifmap_x
                    addr = self.fmap_sets*fmap_idx + self.curr_set
                    # print("addr fmap idx, addr: ", fmap_idx, addr)
                    #print("ifmap req glb: iter, fmap idx, addr ", self.iteration, self.fmap_idx, addr)
                    self.sram.request(RD, addr)
                    self.last_read.push(False)
                self.curr_set += 1
                if self.curr_set == self.fmap_sets:
                    self.curr_set = 0
                    self.fmap_idx += 1
                if self.fmap_idx == self.fmap_per_iteration:
                    # print("fmap idx, fmap per iter: ", self.fmap_idx, self.fmap_per_iteration)
                    self.fmap_idx = 0
                    self.iteration += 1

            # Process the last read sent to the GLB SRAM
            if self.last_read.valid():
                is_zero = self.last_read.pop()
                data = [0]*self.chn_per_word if is_zero else \
                        [e for e in self.sram.response()]
                #print("ifmap rd glb", data, self.iteration)
                self.rd_chn.push(data)
                self.raw_stats['ifmap_glb_rd'] += len(data)
Exemple #23
0
 def instantiate(self):
     self.channel = Channel(4)
     self.push_count = 0
     self.free_count = 0
     self.test_size = 100
Exemple #24
0
    def instantiate(self, arr_y,
            input_chn, output_chn,
            block_size, num_nonzero,
            ifmap_glb_depth, psum_glb_depth, weight_glb_depth):
        # PE static configuration (immutable)
        self.name = 'chip'
        #self.arr_x = arr_x
        self.arr_y = arr_y
        self.block_size = block_size
        self.num_nonzero = num_nonzero
        
        self.stat_type = 'show'

        # Instantiate DRAM IO channels
        self.input_chn = input_chn
        self.output_chn = output_chn

        # Instantiate input deserializer and output serializer
        self.ifmap_wr_chn = Channel()
        self.psum_wr_chn = Channel()
        self.weights_wr_chn = Channel()
        self.deserializer = InputDeserializer(self.input_chn, self.ifmap_wr_chn,
                self.weights_wr_chn, self.psum_wr_chn, arr_y,
                block_size, num_nonzero)

        self.psum_output_chn = Channel()
        self.serializer = OutputSerializer(self.output_chn, self.psum_output_chn)

        # Instantiate GLB and GLB channels
        self.ifmap_rd_chn = Channel(3)
        #self.ifmap_glb = IFMapGLB(self.ifmap_wr_chn, self.ifmap_rd_chn, arr_y,
        #        ifmap_glb_depth, block_size, num_nonzero)

        self.psum_rd_chn = Channel(3)
        self.psum_noc_wr_chn = Channel()
        self.psum_glb = PSumGLB(self.psum_wr_chn, self.psum_noc_wr_chn, self.psum_rd_chn,
                psum_glb_depth, block_size, num_nonzero)

        self.weights_rd_chn = Channel()
        #self.weights_glb = WeightsGLB(self.weights_wr_chn, self.weights_rd_chn, weight_glb_depth, block_size)

        self.ifmap_weights_glb = IFMapWeightsGLB(self.ifmap_wr_chn, self.ifmap_rd_chn,\
            self.weights_wr_chn, self.weights_rd_chn, arr_y, ifmap_glb_depth,\
            weight_glb_depth, block_size, num_nonzero)
        # PE Array and local channel declaration
        self.pe_array = ModuleList()
        self.pe_ifmap_chns = ModuleList()
        self.pe_filter_chns = ModuleList()
        self.pe_psum_in_chns = ModuleList()
        self.pe_psum_out_chns = ModuleList()

        # Actual array instantiation
        for y in range(self.arr_y):
            self.pe_array.append(ModuleList())
            self.pe_ifmap_chns.append(ModuleList())
            self.pe_filter_chns.append(ModuleList())
            self.pe_psum_in_chns.append(ModuleList())
            self.pe_psum_out_chns.append(ModuleList())
            for x in range(1):
                self.pe_ifmap_chns[y].append(Channel(32))
                self.pe_filter_chns[y].append(Channel(32))
                self.pe_psum_in_chns[y].append(Channel(32))
                self.pe_psum_out_chns[y].append(Channel(32))
                self.pe_array[y].append(
                    PE(x, y,
                        self.pe_ifmap_chns[y][x],
                        self.pe_filter_chns[y][x],
                        self.pe_psum_in_chns[y][x],
                        self.pe_psum_out_chns[y][x]
                    )
                )

        # Setup NoC to deliver weights, ifmaps and psums
        self.filter_noc = WeightsNoC(self.weights_rd_chn, self.pe_filter_chns, block_size)
        self.ifmap_noc = IFMapNoC(self.ifmap_rd_chn, self.pe_ifmap_chns)
        self.psum_rd_noc = PSumRdNoC(self.psum_rd_chn, self.pe_psum_in_chns, self.arr_y, block_size)
        self.psum_wr_noc = PSumWrNoC(self.pe_psum_out_chns, self.psum_noc_wr_chn, self.psum_output_chn, self.arr_y, block_size)
Exemple #25
0
    def instantiate(self, arr_x, arr_y, chn_per_word, layers, batch_size):
        self.arr_x = arr_x
        self.arr_y = arr_y
        self.chn_per_word = chn_per_word
        self.layers = layers
        self.batch_size = batch_size

        self.name = 'meta'

        self.started = False
        self.done_chn = Channel()

        self.ifmap_glb_depth = 0
        self.psum_glb_depth = 0
        self.weights_glb_depth = 0

        use_conv = False
        use_fc = False

        self.conv_tb = None
        self.fc_tb = None

        cur_image_size = None
        cur_in_chn = None
        is_conv = False

        num_convs = 0
        num_fc = 0

        for layer in self.layers:
            if isinstance(layer, Conv):
                if cur_image_size is None:
                    pass
                elif cur_image_size != layer.image_size or cur_in_chn != layer.in_chn:
                    raise Exception('Invalid conv image size for %s: %s %s' %
                                    (layer.name, (cur_image_size, cur_in_chn),
                                     (layer.image_size, layer.in_chn)))
                ifmap_glb_depth, psum_glb_depth, weights_glb_depth = WSArchTB.required_glb_depth(
                    self.arr_x, self.arr_y, self.chn_per_word,
                    layer.image_size, layer.filter_size, layer.in_chn,
                    layer.out_chn)
                use_conv = True
                output_shape = layer.new_shape((self.batch_size, ) +
                                               layer.image_size +
                                               (layer.out_chn, ))
                cur_image_size = output_shape[1:3]
                cur_in_chn = output_shape[3]
                is_conv = True
                num_convs += 1
            elif isinstance(layer, FC):
                if cur_image_size is None:
                    pass
                elif not is_conv and cur_image_size != layer.input_size:
                    raise Exception('Invalid fc dimension transition for ' +
                                    layer.name)
                elif is_conv and cur_image_size[0] * cur_image_size[
                        1] * cur_in_chn != layer.input_size:
                    raise Exception(
                        'Invalid conv to fc dimension transition to ' +
                        layer.name)
                ifmap_glb_depth, psum_glb_depth, weights_glb_depth = OSArchTB.required_glb_depth(
                    self.arr_x, self.arr_y, self.chn_per_word, self.batch_size,
                    layer.input_size, layer.output_size)
                use_fc = True
                _, cur_image_size = layer.new_shape(
                    (self.batch_size, layer.output_size))
                is_conv = False
                num_fc += 1
            else:
                raise Exception('layer not valid')
            self.ifmap_glb_depth = max(self.ifmap_glb_depth, ifmap_glb_depth)
            self.psum_glb_depth = max(self.psum_glb_depth, psum_glb_depth)
            self.weights_glb_depth = max(self.weights_glb_depth,
                                         weights_glb_depth)

        if use_conv:
            self.conv_tb = WSArchTB(self.arr_x, self.arr_y, self.chn_per_word,
                                    self.done_chn, self.ifmap_glb_depth,
                                    self.psum_glb_depth,
                                    self.weights_glb_depth)
        if use_fc:
            self.fc_tb = OSArchTB(self.arr_x, self.arr_y, self.chn_per_word,
                                  self.done_chn, self.ifmap_glb_depth,
                                  self.psum_glb_depth, self.weights_glb_depth)

        self.layer_step = 0
        self.batch_step = 0
        self.conv_inputs = [None] * self.batch_size
        self.fc_input = None

        self.conv_weights = [None] * num_convs
        self.conv_bias = [None] * num_convs

        self.fc_weights = [None] * num_fc
        self.fc_bias = [None] * num_fc

        self.cur_conv = 0
        self.cur_fc = 0
Exemple #26
0
class PSumGLB(Module):
    def instantiate(self, dram_wr_chn, noc_wr_chn, rd_chn, glb_depth, chn_per_word):
        self.dram_wr_chn = dram_wr_chn
        self.noc_wr_chn = noc_wr_chn
        self.rd_chn = rd_chn
        self.chn_per_word = chn_per_word
        self.name = 'psum_glb'

        self.stat_type = 'show'
        self.raw_stats = {'size' : (glb_depth, chn_per_word), 'psum_glb_rd': 0, 'psum_glb_wr': 0}

        self.sram = SRAM(glb_depth, chn_per_word, nports=2)
        self.last_read = Channel(3)

        self.filter_size = (0, 0)
        self.fmap_sets = 0
        self.fmap_per_iteration = 0

        self.rd_set = 0
        self.fmap_rd_idx = 0
        self.iteration = 0

        self.wr_set = 0
        self.fmap_wr_idx = 0
        self.wr_done = False

    def configure(self, filter_size, fmap_sets, fmap_per_iteration):
        self.wr_done = False

        self.filter_size = filter_size
        self.fmap_sets = fmap_sets
        self.fmap_per_iteration = fmap_per_iteration

        self.rd_set = 0
        self.fmap_rd_idx = 0
        self.iteration = 0

        self.wr_set = 0
        self.fmap_wr_idx = 0
        self.wr_done = False

    def tick(self):
        num_iteration = self.filter_size[0]*self.filter_size[1]

        if not self.wr_done:
            # Write to GLB
            if self.dram_wr_chn.valid():
                data = self.dram_wr_chn.pop()
                self.raw_stats['psum_glb_wr'] += len(data)
                # print "psum_glb wr"
                # Write ifmap to glb
                addr = self.fmap_sets*self.fmap_wr_idx + self.wr_set
                self.wr_set += 1
                self.sram.request(WR, addr, data, port=1)
                if self.wr_set == self.fmap_sets:
                    self.wr_set = 0
                    self.fmap_wr_idx += 1
                if self.fmap_wr_idx == self.fmap_per_iteration:
                    # Done initializing ifmaps and psums
                    # self.sram.dump()
                    self.fmap_wr_idx = 0
                    self.wr_done = True
                #print ("psum orig write, fmap_sets, fmap_wr_idx, wr_set, addr, data: ",self.fmap_sets, self.fmap_wr_idx, self.wr_set, addr, data)
        else:
            # Read from GLB and deal with SRAM latency
            # print self.rd_chn.vacancy(1), self.rd_chn.rd_ptr.rd(), self.rd_chn.wr_ptr.rd()
            if self.rd_chn.vacancy(1) and self.iteration < num_iteration:
                addr = self.fmap_sets*self.fmap_rd_idx + self.rd_set
                #print("psum req glb", self.iteration, self.fmap_rd_idx, self.rd_set)
                self.sram.request(RD, addr, port=0)
                self.last_read.push(False)
                self.rd_set += 1

                if self.rd_set == self.fmap_sets:
                    self.rd_set = 0
                    self.fmap_rd_idx += 1
                if self.fmap_rd_idx == self.fmap_per_iteration:
                    self.fmap_rd_idx = 0
                    self.iteration += 1

            # Process the last read sent to the GLB SRAM
            if self.last_read.valid():
                is_zero = self.last_read.pop()
                data = [0]*self.chn_per_word if is_zero else \
                        [e for e in self.sram.response()]
                self.rd_chn.push(data)
                self.raw_stats['psum_glb_rd'] += len(data)
                #print("psum rd glb: data", data)

            if self.noc_wr_chn.valid():
                data = self.noc_wr_chn.pop()
                #print("psum_to_glb: ", self.fmap_wr_idx, self.wr_set, data)

                self.raw_stats['psum_glb_wr'] += len(data)
                addr = self.fmap_sets*self.fmap_wr_idx + self.wr_set
                #print("noc psum wr glb", self.fmap_wr_idx, self.wr_set, data)
                self.wr_set += 1
                self.sram.request(WR, addr, data, port=1)
                if self.wr_set == self.fmap_sets:
                    self.wr_set = 0
                    self.fmap_wr_idx += 1
                if self.fmap_wr_idx == self.fmap_per_iteration:
                    # Done initializing ifmaps and psums
                    #self.sram.dump()
                    self.fmap_wr_idx = 0
class IFMapGLB(Module):
    def instantiate(self, wr_chn, rd_chn, glb_depth, chn_per_word):
        self.wr_chn = wr_chn
        self.rd_chn = rd_chn
        self.chn_per_word = chn_per_word
        self.glb_depth = glb_depth
        self.name = 'ifmap_glb'

        self.stat_type = 'show'
        self.raw_stats = {
            'size': (glb_depth, chn_per_word),
            'ifmap_glb_rd': 0,
            'ifmap_glb_wr': 0
        }

        self.sram = SRAM(glb_depth, chn_per_word)
        self.last_read = Channel(3)

        self.image_size = (0, 0)
        self.filter_size = (0, 0)
        self.fmap_sets = 0
        self.fmap_per_iteration = 0

        self.curr_set = 0
        self.fmap_idx = 0
        self.iteration = 0
        self.wr_done = False

    def configure(self, image_size, filter_size, fmap_sets,
                  fmap_per_iteration):
        self.wr_done = False

        self.image_size = image_size
        self.filter_size = filter_size
        self.fmap_sets = fmap_sets
        self.fmap_per_iteration = fmap_per_iteration
        self.curr_tile = 0
        self.num_tiles = 4
        self.addr = 0
        print("ifmap glb_size: ", self.glb_depth)

    def tick(self):
        num_iteration = self.filter_size[0] * self.filter_size[1]
        offset_x = (self.filter_size[0] - 1) // 2
        offset_y = (self.filter_size[1] - 1) // 2
        filter_x = self.iteration % self.filter_size[0] - offset_x
        filter_y = self.iteration // self.filter_size[0] - offset_y

        if not self.wr_done:
            # Write to GLB
            if self.wr_chn.valid():
                data = self.wr_chn.pop()
                self.raw_stats['ifmap_glb_wr'] += len(data)
                # print "ifmap_glb wr"
                # Write ifmap to glb
                addr = self.fmap_sets * self.curr_tile + self.curr_set + self.fmap_idx * self.num_tiles
                #print ("ifmap_to_glb: ", self.curr_tile, self.fmap_idx, addr)
                self.curr_set += 1
                self.sram.request(WR, addr, data)
                if self.curr_set == self.fmap_sets:
                    self.curr_set = 0
                    self.curr_tile += 1
                if self.curr_tile == self.num_tiles:
                    # Done initializing ifmaps and psums
                    # self.sram.dump()
                    self.curr_tile = 0
                    self.fmap_idx += 1
                if self.fmap_idx == self.fmap_per_iteration:
                    self.wr_done = True
        else:
            if self.rd_chn.vacancy(1) and self.addr < self.glb_depth:
                # Read from GLB and deal with SRAM latency
                self.sram.request(RD, self.addr)
                #print ("read_ifmap_glb: ", self.addr)
                self.addr += 1
                self.last_read.push(False)

                # Process the last read sent to the GLB SRAM
            if self.last_read.valid():
                #print ("ifmap_glb_to_noc")
                is_zero = self.last_read.pop()
                data = [e for e in self.sram.response()]
                # print "ifmap rd glb", data
                self.rd_chn.push(data)
                self.raw_stats['ifmap_glb_rd'] += len(data)
Exemple #28
0
class PSumGLB(Module):
    def instantiate(self, dram_wr_chn, noc_wr_chn, rd_chn, glb_depth,
                    chn_per_word):
        self.dram_wr_chn = dram_wr_chn
        self.noc_wr_chn = noc_wr_chn
        self.rd_chn = rd_chn
        self.chn_per_word = chn_per_word
        self.name = 'psum_glb'

        self.stat_type = 'show'
        self.raw_stats = {'size': (glb_depth, chn_per_word), 'rd': 0, 'wr': 0}

        self.sram = SRAM(glb_depth, chn_per_word, nports=2, name=self.name)
        self.last_read = Channel(3, name='last_read')

        self.filter_size = (0, 0)
        self.fmap_sets = 0
        self.fmap_per_iteration = 0

        self.rd_set = 0
        self.fmap_rd_idx = 0
        self.iteration = 0

        self.wr_set = 0
        self.fmap_wr_idx = 0
        self.wr_done = False

    def configure(self, filter_size, fmap_sets, fmap_per_iteration):
        self.wr_done = False

        self.filter_size = filter_size
        self.fmap_sets = fmap_sets
        self.fmap_per_iteration = fmap_per_iteration

        self.rd_set = 0
        self.fmap_rd_idx = 0
        self.iteration = 0

        self.wr_set = 0
        self.fmap_wr_idx = 0
        self.wr_done = False

    def tick(self):
        num_iteration = self.filter_size[0] * self.filter_size[1]

        if not self.wr_done:
            # Write to GLB
            if self.dram_wr_chn.valid():
                data = self.dram_wr_chn.pop()
                self.raw_stats['wr'] += 1
                # print "psum_glb wr"
                # Write ifmap to glb
                # print "ifmap_to_glb: ", in_sets, self.fmap_idx, self.curr_set
                addr = self.fmap_sets * self.fmap_wr_idx + self.wr_set
                self.wr_set += 1
                self.sram.request(WR, addr, data, port=0)
                if self.wr_set == self.fmap_sets:
                    self.wr_set = 0
                    self.fmap_wr_idx += 1
                if self.fmap_wr_idx == self.fmap_per_iteration:
                    # Done initializing ifmaps and psums
                    # self.sram.dump()
                    self.fmap_wr_idx = 0
                    self.wr_done = True
        else:
            # Read from GLB and deal with SRAM latency
            # print self.rd_chn.vacancy(1), self.rd_chn.rd_ptr.rd(), self.rd_chn.wr_ptr.rd()
            if self.rd_chn.vacancy(1) and self.iteration < num_iteration:
                addr = self.fmap_sets * self.fmap_rd_idx + self.rd_set
                # print "psum req glb", self.iteration, self.fmap_rd_idx, self.rd_set
                self.sram.request(RD, addr, port=0)
                self.last_read.push(False)
                self.rd_set += 1

                if self.rd_set == self.fmap_sets:
                    self.rd_set = 0
                    self.fmap_rd_idx += 1
                if self.fmap_rd_idx == self.fmap_per_iteration:
                    self.fmap_rd_idx = 0
                    self.iteration += 1

            # Process the last read sent to the GLB SRAM
            if self.last_read.valid():
                is_zero = self.last_read.pop()
                data = [0]*self.chn_per_word if is_zero else \
                        [e for e in self.sram.response()]
                self.rd_chn.push(data)
                self.raw_stats['rd'] += 1
                # print "psum rd glb", data

            # If we can pull an elemnt off of the write channel, do it
            # and write it into the location specificed by the current
            # fmap_Sets, fmap_wr_idx, and wr_set!
            if self.noc_wr_chn.valid():
                # print "psum_to_glb: ", self.fmap_wr_idx, self.wr_set
                data = self.noc_wr_chn.pop()
                self.raw_stats['wr'] += 1
                addr = self.fmap_sets * self.fmap_wr_idx + self.wr_set
                # print "psum wr glb", self.fmap_wr_idx, self.wr_set, data
                self.wr_set += 1
                self.sram.request(WR, addr, data, port=1)
                if self.wr_set == self.fmap_sets:
                    self.wr_set = 0
                    self.fmap_wr_idx += 1
                if self.fmap_wr_idx == self.fmap_per_iteration:
                    # Done initializing ifmaps and psums
                    # self.sram.dump()
                    self.fmap_wr_idx = 0
Exemple #29
0
    def instantiate(self, arr_x, arr_y,
            input_chn, output_chn,
            chn_per_word,
            ifmap_glb_depth):
        # PE static configuration (immutable)
        self.name = 'chip'
        self.arr_x = arr_x
        self.arr_y = arr_y
        self.chn_per_word = chn_per_word

        self.post_tr_x = arr_x # num output channels = 8
        self.post_tr_y = 4 # num tiles = 4

        self.pre_tr_ifmap_x = arr_y # num input channels = 4
        self.pre_tr_ifmap_y = 4 # num tiles = 4

        self.pre_tr_weights_x = arr_y # num input channels = 4
        self.pre_tr_weights_y = arr_x # num output channels = 8

        self.stat_type = 'show'

        # Instantiate DRAM IO channels
        self.input_chn = input_chn
        self.output_chn = output_chn

        # Instantiate input deserializer and output serializer
        self.ifmap_wr_chn = Channel()
        self.weights_wr_chn = Channel()
        self.bias_wr_chn = Channel()
        self.deserializer = InputDeserializer(self.input_chn, self.ifmap_wr_chn,
                self.weights_wr_chn, self.bias_wr_chn, arr_x, arr_y,
                chn_per_word)

        self.psum_output_chn = Channel()
        self.serializer = OutputSerializer(self.output_chn, self.psum_output_chn)

        # Instantiate GLB and GLB channels
        self.ifmap_glb_wr_chn = Channel(3)
        self.ifmap_rd_chn = Channel(3)
        self.ifmap_glb = IFMapGLB(self.ifmap_glb_wr_chn, self.ifmap_rd_chn,
                ifmap_glb_depth, chn_per_word)

        self.psum_rd_chn = Channel(3)
        self.psum_noc_wr_chn = Channel()
        #  self.psum_glb = PSumGLB(self.psum_wr_chn, self.psum_noc_wr_chn, self.psum_rd_chn,
        #          psum_glb_depth, chn_per_word)

        self.weights_glb_wr_chn = Channel(3)
        self.weights_rd_chn = Channel()
        self.weights_glb = WeightsGLB(self.weights_glb_wr_chn, self.weights_rd_chn)

        self.bias_rd_chn = Channel()
        self.bias_glb = BiasGLB(self.bias_wr_chn, self.bias_rd_chn)

        # PE Array and local channel declaration
        self.pe_array = ModuleList()
        self.pe_ifmap_chns = ModuleList()
        self.pe_filter_chns = ModuleList()
        self.pe_psum_chns = ModuleList()
        self.pe_psum_chns.append(ModuleList())
        for x in range(self.arr_x):
            self.pe_psum_chns[0].append(Channel(32))

        # Actual PE array instantiation
        for y in range(self.arr_y):
            self.pe_array.append(ModuleList())
            self.pe_ifmap_chns.append(ModuleList())
            self.pe_filter_chns.append(ModuleList())
            self.pe_psum_chns.append(ModuleList())
            for x in range(self.arr_x):
                self.pe_ifmap_chns[y].append(Channel(32))
                self.pe_filter_chns[y].append(Channel(32))
                self.pe_psum_chns[y+1].append(Channel(32))
                self.pe_array[y].append(
                    PE(x, y,
                        self.pe_ifmap_chns[y][x],
                        self.pe_filter_chns[y][x],
                        self.pe_psum_chns[y][x],
                        self.pe_psum_chns[y+1][x]
                    )
                )

        # Pre Transform IFMap array and local channel declaration
        self.pre_tr_ifmap_array = ModuleList()
        self.pre_tr_ifmap_in_chns = ModuleList()
        self.pre_tr_ifmap_out_chns = ModuleList()

        # Actual pre transform IFMap array instantiation
        for y in range(self.pre_tr_ifmap_y):
            self.pre_tr_ifmap_array.append(ModuleList())
            self.pre_tr_ifmap_in_chns.append(ModuleList())
            self.pre_tr_ifmap_out_chns.append(ModuleList())
            for x in range(self.pre_tr_ifmap_x):
                self.pre_tr_ifmap_in_chns[y].append(Channel(32))
                self.pre_tr_ifmap_out_chns[y].append(Channel(32))
                self.pre_tr_ifmap_array[y].append(
                    PreTransformIFMap(x, y,
                        self.pre_tr_ifmap_in_chns[y][x],
                        self.pre_tr_ifmap_out_chns[y][x]
                        )
                )

        # Pre Transform Weight array and local channel declaration
        self.pre_tr_weights_array = ModuleList()
        self.pre_tr_weights_in_chns = ModuleList()
        self.pre_tr_weights_out_chns = ModuleList()

        # Actual pre transform Weight array instantiation
        for y in range(self.pre_tr_weights_y):
            self.pre_tr_weights_array.append(ModuleList())
            self.pre_tr_weights_in_chns.append(ModuleList())
            self.pre_tr_weights_out_chns.append(ModuleList())
            for x in range(self.pre_tr_weights_x):
                self.pre_tr_weights_in_chns[y].append(Channel(32))
                self.pre_tr_weights_out_chns[y].append(Channel(32))
                self.pre_tr_weights_array[y].append(
                    PreTransformWeights(x, y,
                        self.pre_tr_weights_in_chns[y][x],
                        self.pre_tr_weights_out_chns[y][x]
                        )
                )

        # Post Transform Array and local channel declaration
        self.post_tr_array = ModuleList()
        self.post_tr_bias_chns = ModuleList()
        self.post_tr_ofmap_in_chns = ModuleList()
        self.post_tr_ofmap_out_chns = ModuleList()

        # Actual post transform array instantiation
        for y in range(self.post_tr_y):
            self.post_tr_array.append(ModuleList())
            self.post_tr_bias_chns.append(ModuleList())
            self.post_tr_ofmap_in_chns.append(ModuleList())
            self.post_tr_ofmap_out_chns.append(ModuleList())
            for x in range(self.post_tr_x):
                self.post_tr_bias_chns[y].append(Channel(32))
                self.post_tr_ofmap_in_chns[y].append(Channel(32))
                self.post_tr_ofmap_out_chns[y].append(Channel(32))
                self.post_tr_array[y].append(
                    PostTransform(x, y,
                        self.post_tr_bias_chns[y][x],
                        self.post_tr_ofmap_in_chns[y][x],
                        self.post_tr_ofmap_out_chns[y][x]
                        )
                )

        # Setup NoC to deliver weights, ifmaps and psums
        self.filter_noc = WeightsNoC(self.weights_rd_chn, self.pe_filter_chns, self.chn_per_word)
        self.ifmap_noc = IFMapNoC(self.ifmap_rd_chn, self.pe_ifmap_chns, self.arr_x, self.chn_per_word)
        self.psum_rd_noc = PSumRdNoC(self.pe_psum_chns[0], self.chn_per_word)
        #self.psum_wr_noc = PSumWrNoC(self.pe_psum_chns[-1], self.psum_output_chn, self.chn_per_word)
        self.bias_noc = BiasNoC(self.bias_rd_chn, self.post_tr_bias_chns, self.chn_per_word)

        # Setup NoC for post transform blocks
        self.post_tr_wr_noc = PostTrWrNoC(self.pe_psum_chns[-1], self.post_tr_ofmap_in_chns, self.chn_per_word)
        self.post_tr_rd_noc = PostTrRdNoC(self.post_tr_ofmap_out_chns, self.psum_output_chn, self.chn_per_word)

        # Instantiate tiler for ifmaps
        self.ifmap_tiler = IFMapTiler(self.ifmap_wr_chn, self.pre_tr_ifmap_in_chns, self.chn_per_word)

        # Setup NoC for pre transform blocks
        #self.pre_tr_ifmap_wr_noc = PreTrIFMapWrNoC(self.ifmap_wr_chn, self.pre_tr_ifmap_in_chns, self.chn_per_word)
        self.pre_tr_ifmap_rd_noc = PreTrIFMapRdNoC(self.pre_tr_ifmap_out_chns, self.ifmap_glb_wr_chn, self.chn_per_word)
        self.pre_tr_weights_wr_noc = PreTrWeightsWrNoC(self.weights_wr_chn, self.pre_tr_weights_in_chns, self.chn_per_word)
        self.pre_tr_weights_rd_noc = PreTrWeightsRdNoC(self.pre_tr_weights_out_chns, self.weights_glb_wr_chn, self.chn_per_word)
Exemple #30
0
class IFMapGLB(Module):
    def instantiate(self, wr_chn, rd_chn, glb_depth, chn_per_word):
        self.wr_chn = wr_chn
        self.rd_chn = rd_chn
        self.chn_per_word = chn_per_word
        self.name = 'ifmap_glb'

        self.stat_type = 'show'
        self.raw_stats = {'size' : (glb_depth, chn_per_word), 'rd': 0, 'wr': 0}


        self.sram = SRAM(glb_depth, chn_per_word, name=self.name)
        self.last_read = Channel(3, name='last_read')

        self.image_size = (0, 0)
        self.filter_size = (0, 0)
        self.fmap_sets = 0
        self.full_fmap_sets = 0
        self.fmap_per_iteration = 0

        self.curr_set = 0
        self.fmap_idx = 0
        self.iteration = 0
        self.tile_in = 0
        self.tile_out = 0
        self.wr_done = False
        self.task_done = True

    def configure(self, image_size, filter_size, fmap_sets, full_fmap_sets, tiles_out, fmap_per_iteration):
        self.wr_done = False
        self.curr_set = 0
        self.fmap_idx = 0
        self.iteration = 0

        self.image_size = image_size
        self.filter_size = filter_size
        self.fmap_sets = fmap_sets
        self.full_fmap_sets = full_fmap_sets
        self.fmap_per_iteration = fmap_per_iteration
        self.tiles_out = tiles_out
        self.tile_in = 0
        self.tile_out = 0
        self.task_done = False

    def tick(self):
        num_iteration = self.filter_size[0]*self.filter_size[1]
        offset_x = (self.filter_size[0] - 1)//2
        offset_y = (self.filter_size[1] - 1)//2
        filter_x = self.iteration % self.filter_size[0] - offset_x
        filter_y = self.iteration // self.filter_size[0] - offset_y
        tiles_in = self.full_fmap_sets // self.fmap_sets

        if self.task_done:
            return

        if not self.wr_done:
            # Write to GLB
            if self.wr_chn.valid():
                data = self.wr_chn.pop()
                # print "ifmap_glb wr"
                # Write ifmap to glb
                # print "ifmap_to_glb: ", in_sets, self.fmap_idx, self.curr_set
                addr = self.full_fmap_sets*self.fmap_idx + self.curr_set
                self.curr_set += 1
                self.sram.request(WR, addr, data)
                self.raw_stats['wr'] += len(data)
                if self.curr_set == self.full_fmap_sets:
                    self.curr_set = 0
                    self.fmap_idx += 1
                if self.fmap_idx == self.fmap_per_iteration:
                    # Done initializing ifmaps and psums
                    # self.sram.dump()
                    self.fmap_idx = 0
                    self.wr_done = True
        else:
            did_read = False
            # Read from GLB and deal with SRAM latency
            if self.rd_chn.vacancy(1) and self.iteration < num_iteration and self.tile_in < tiles_in:
                fmap_x = self.fmap_idx % self.image_size[0]
                fmap_y = self.fmap_idx  // self.image_size[0]
                ifmap_x, ifmap_y = (fmap_x + filter_x, fmap_y + filter_y)
                if (ifmap_x < 0) or (ifmap_x >= self.image_size[0]) or \
                        (ifmap_y < 0) or (ifmap_y >= self.image_size[1]):
                    # print "ifmap req zero", self.iteration, self.fmap_idx
                    self.last_read.push(True)
                else:
                    fmap_idx = (ifmap_y*self.image_size[0]) + ifmap_x
                    addr = self.fmap_sets*(fmap_idx*tiles_in+self.tile_in) + self.curr_set
                    # print "ifmap req glb", self.iteration, self.fmap_idx
                    self.sram.request(RD, addr)
                    self.raw_stats['rd'] += self.chn_per_word
                    self.last_read.push(False)
                did_read = True
                self.curr_set += 1

                if self.curr_set == self.fmap_sets:
                    self.curr_set = 0
                    self.fmap_idx += 1
                if self.fmap_idx == self.fmap_per_iteration:
                    self.fmap_idx = 0
                    self.iteration += 1

            # Process the last read sent to the GLB SRAM
            if self.last_read.valid():
                is_zero = self.last_read.pop()
                data = [0]*self.chn_per_word if is_zero else \
                        [e for e in self.sram.response()]
                # print "ifmap rd glb", data
                self.rd_chn.push(data)
            elif not did_read:
                if self.iteration == num_iteration:
                    self.iteration = 0
                    self.tile_in += 1
                    if self.tile_in == tiles_in:
                        self.tile_in = 0
                        self.tile_out += 1
                        if self.tile_out == self.tiles_out:
                            self.tile_out = 0
                            self.task_done = True