Esempio n. 1
0
    def test_prod(self):
        ''' Check prod. '''
        for fs in util.factorize(24, 3):
            self.assertEqual(util.prod(fs), 24)

        for fs in util.factorize(1024, 3):
            self.assertEqual(util.prod(fs), 1024)
    def _make_data_layout(self, nfm, hfm, wfm, origin, nums, dims):
        ''' Make a DataLayout instance. '''
        assert util.prod(nums) == dims.size()

        def _coord(idxs):
            # In the order of n, b, w, h, i.e., 1, 0, 3, 2.
            cflat = 0
            for i in [1, 0, 3, 2]:
                cflat = cflat * nums[i] + idxs[i]
            assert cflat < dims.size()
            return PhyDim2(*divmod(cflat, dims.w))

        sizes = (self.batch_size, nfm, hfm, wfm)

        frmap = FmapRangeMap()

        for idxs in itertools.product(*[range(n) for n in nums]):

            begs = [i * s // n for i, n, s in zip(idxs, nums, sizes)]
            ends = [(i + 1) * s // n for i, n, s in zip(idxs, nums, sizes)]

            frmap.add(FmapRange(begs, ends), (_coord(idxs), ))

        dl = DataLayout(frmap=frmap, origin=origin, type=NodeRegion.DATA)
        assert dl.frmap.complete_fmap_range().size() == util.prod(sizes)

        return dl
    def test_scheme_dict(self):
        ''' get_scheme_dict. '''

        for bl_ts, bl_ords in self._gen_loopblocking_all():

            lbs = self._lbs(bl_ts, bl_ords, part_occ=self.part_occ)

            if not lbs.is_valid():
                self.assertIsNone(lbs.get_scheme_dict(self.cost))
                continue

            sdict = lbs.get_scheme_dict(self.cost)

            self.assertAlmostEqual(sdict['cost'], lbs.get_cost(self.cost))
            self.assertAlmostEqual(sdict['ops'], lbs.ops)
            self.assertAlmostEqual(sdict['time'], lbs.time)

            self.assertEqual(id(sdict['access']), id(lbs.get_access()))
            for lvl in [0, 1]:
                for dce in range(de.NUM):
                    self.assertAlmostEqual(sdict['size'][lvl][dce],
                                           lbs.data_size(lvl, dce))

            self.assertAlmostEqual(sdict['part_occ'], self.part_occ)

            self.assertEqual(util.prod(sdict['ti']),
                             self.nld['BASE'].loopcnt[le.IFM])
            self.assertEqual(util.prod(sdict['to']),
                             self.nld['BASE'].loopcnt[le.OFM])
            self.assertEqual(util.prod(sdict['tb']),
                             self.nld['BASE'].loopcnt[le.BAT])
        def do_access(self, idx_pr, cnt_pr, read=1, write=0):
            '''
            Access the buffer by `read` and/or `write`, with the unit index
            `idx_pr` and count `cnt_pr`, of all dimensions.

            Return the count of the accessing data to the next level, of all
            dimensions.
            '''
            if self.bypass:
                # Bypass, relay to the next level.
                return cnt_pr

            # Range index.
            ridx_pr = self._range_idx_pr(idx_pr)

            # Access.
            self.access += util.prod(cnt_pr) * (read + write)

            if ridx_pr == self.data:
                # Hit.
                return (0, 0)

            # Miss.
            self.data = ridx_pr
            return self.buf_cnt_pr
 def _make_bl_ts(self, ti_part, to_part, tb_part, wlkey='BASE'):
     '''
     Make a set of blocking factors. `ti_part`, `to_part`, `tb_part` can
     contain one 0 value to be filled.
     '''
     try:
         idx = ti_part.index(0)
     except ValueError:
         ti = ti_part
     else:
         ti = [
             ti_part[x] if x != idx else util.idivc(
                 self.nld[wlkey].loopcnt[le.IFM],
                 util.prod(ti_part[:idx] + ti_part[idx + 1:]))
             for x in range(3)
         ]
     try:
         idx = to_part.index(0)
     except ValueError:
         to = to_part
     else:
         to = [
             to_part[x] if x != idx else util.idivc(
                 self.nld[wlkey].loopcnt[le.OFM],
                 util.prod(to_part[:idx] + to_part[idx + 1:]))
             for x in range(3)
         ]
     try:
         idx = tb_part.index(0)
     except ValueError:
         tb = tb_part
     else:
         tb = [
             tb_part[x] if x != idx else util.idivc(
                 self.nld[wlkey].loopcnt[le.BAT],
                 util.prod(tb_part[:idx] + tb_part[idx + 1:]))
             for x in range(3)
         ]
     lp_ts = [None] * le.NUM
     lp_ts[le.IFM] = ti
     lp_ts[le.OFM] = to
     lp_ts[le.BAT] = tb
     return tuple(zip(*lp_ts))
Esempio n. 6
0
    def test_int(self):
        ''' Int. '''
        self.assertIsInstance(util.prod([3, 5, 7]), int)

        self.assertEqual(util.prod([3, 5, 7]), 105)
        self.assertEqual(util.prod([3, 5, -1]), -15)
        self.assertEqual(util.prod([3, -5, 7]), -105)
        self.assertEqual(util.prod([3, -5, 0]), 0)

        self.assertEqual(util.prod((3, 5, 7)), 105)
        self.assertEqual(util.prod(set([3, 5, 7])), 105)
        self.assertEqual(util.prod({3: 'a', 5: 'b', 7: 'c'}), 105)
        def _init_sub_range(self, lp_t_list, dim_loops):

            assert len(dim_loops) == 2

            subrng_list = [(0, 0)]
            subrng_sz_pr = [1, 1]

            # From inner to outer.
            for lpe, t in reversed(lp_t_list):
                # The data dimension index of this loop.
                try:
                    d = dim_loops.index(lpe)
                except ValueError:
                    # This loop is not related to the data, skip.
                    assert lpe not in dim_loops
                    continue

                # Size of this dimension of current loop body, i.e., all inner
                # loops.
                s = subrng_sz_pr[d]

                # Make the new subrange list, by looping over the current loop
                # body with the current loop factor, and updating this
                # dimension.
                new_subrng_list = []
                for i in range(t):
                    new_subrng_list += [
                        tuple(i_ + i * s if d_ == d else i_
                              for d_, i_ in enumerate(sr))
                        for sr in subrng_list
                    ]
                subrng_list = new_subrng_list

                # Update size of this dimension.
                subrng_sz_pr[d] *= t

                # Check.
                assert len(set(subrng_list)) == len(subrng_list)
                assert len(subrng_list) == util.prod(subrng_sz_pr)

            subrng_cnt_pr = tuple(
                buf_cnt // subrng_sz
                for buf_cnt, subrng_sz in zip(self.buf_cnt_pr, subrng_sz_pr))

            return subrng_list, subrng_cnt_pr
    def _sim_access_conv(self, lbs, get_bufshr=False):
        '''
        Get data access by actually simulating and generating loops for CONV
        layer.

        If `get_bufshr` is True, also return bufshr stats.
        '''
        self.assertTrue(lbs.is_valid(), '_sim_access_conv: invalid lbs.')

        data_loops = lbs.nld.data_loops

        lpts = tuple(zip(*lbs.bl_ts))

        subgrp_size, rot_unit_cnt, lp_t_list = self._bufshr_params(lbs)
        data_loops = lbs.nld.data_loops

        # Get buffered unit counts at each level.
        dram_buf_cnt_pr_list = [
            tuple(util.prod(lpts[lpe]) for lpe in data_loops[dce].loops())
            for dce in range(de.NUM)
        ]
        gbuf_buf_cnt_pr_list = [
            tuple(util.prod(lpts[lpe][1:]) for lpe in data_loops[dce].loops())
            for dce in range(de.NUM)
        ]
        regf_buf_cnt_pr_list = [
            tuple(util.prod(lpts[lpe][2:]) for lpe in data_loops[dce].loops())
            for dce in range(de.NUM)
        ]

        # Initialize SimBuffer.
        drams = [None] * de.NUM
        for dce, buf_cnt_pr in enumerate(dram_buf_cnt_pr_list):
            drams[dce] = self._SimBuffer(
                dce,
                buf_cnt_pr,
                lbs.nld.unit_access[me.DRAM][dce] if lbs.stored_in_gbuf[dce]
                else lbs.nld.unit_access[me.GBUF][dce],
            )
        gbufs = [None] * de.NUM
        for dce, buf_cnt_pr in enumerate(gbuf_buf_cnt_pr_list):
            gbufs[dce] = self._SimBufferSharing(
                dce,
                buf_cnt_pr,
                lbs.nld.unit_access[me.GBUF][dce],
                subgrp_size[dce],
                rot_unit_cnt[dce],
                lp_t_list,
                data_loops[dce].loops(),
                bypass=(not lbs.stored_in_gbuf[dce]))
        regfs = [None] * de.NUM
        for dce, buf_cnt_pr in enumerate(regf_buf_cnt_pr_list):
            regfs[dce] = self._SimBuffer(
                dce,
                buf_cnt_pr,
                lbs.nld.unit_access[me.REGF][dce],
            )

        # Already generated psum for OFM.
        ofm_psum = set()

        # Simulation.
        for idx_tuple in lbs.gen_index():

            for dce in range(de.NUM):

                idx_pr = tuple(data_loops[dce].take(idx_tuple))

                if dce == de.OFM:
                    # Fetch and writeback, unless for the first time (no fetch).
                    write = 1
                    read = 1 if idx_pr in ofm_psum else 0
                    ofm_psum.add(idx_pr)
                else:
                    read = 1
                    write = 0

                # PE.
                cnt_pr = (1, 1)

                # REGF.
                cnt_pr = regfs[dce].do_access(idx_pr, cnt_pr, read, write)
                if not any(cnt_pr):
                    continue

                # GBUF.
                cnt_pr = gbufs[dce].do_access(idx_pr, cnt_pr, read, write)
                if not any(cnt_pr):
                    continue

                # DRAM.
                cnt_pr = drams[dce].do_access(idx_pr, cnt_pr, read, write)
                if not any(cnt_pr):
                    continue

        dram_access = [drams[dce].access_size() for dce in range(de.NUM)]
        gbuf_access = [gbufs[dce].access_size() for dce in range(de.NUM)]

        # Sum over all nodes.
        dram_access = [
            a * lbs.num_nodes // r
            for a, r in zip(dram_access, lbs.accfwd_reduction)
        ]
        gbuf_access = [a * lbs.num_nodes for a in gbuf_access]

        # Buffer sharing.
        if get_bufshr:
            rotation_access = [
                gbufs[dce].rotation_access_size() *
                (lbs.num_nodes // subgrp_size[dce]) for dce in range(de.NUM)
            ]
            wide_fetch_access = [
                gbufs[dce].wide_fetch_access_size() *
                (lbs.num_nodes // subgrp_size[dce]) for dce in range(de.NUM)
            ]
            rotation_rounds = [
                gbufs[dce].rotation_rounds() for dce in range(de.NUM)
            ]

            return dram_access, gbuf_access, \
                    (rotation_access, wide_fetch_access, rotation_rounds)

        for dce in range(de.NUM):
            self.assertAlmostEqual(gbufs[dce].rotation_access_size(),
                                   0,
                                   msg='_sim_access_conv: non-0 '
                                   'rotation access with no bufshr.')
            self.assertAlmostEqual(gbufs[dce].wide_fetch_access_size(),
                                   0,
                                   msg='_sim_access_conv: non-0 '
                                   'wide fetch access with no bufshr.')
            self.assertEqual(gbufs[dce].rotation_rounds(),
                             0,
                             msg='_sim_access_conv: non-0 '
                             'rotation rounds with no bufshr.')

        return dram_access, gbuf_access
        def do_access(self, idx_pr, cnt_pr, read=1, write=0):

            ret = self.base.do_access(idx_pr, cnt_pr, read=read, write=write)

            if self.bypass:
                # Bypass, skip buffer sharing.
                return ret

            # Range index.
            ridx_pr = self._range_idx_pr(idx_pr)

            if any(ret):
                # Miss in the shared buffer and load new range. Reset.
                self.cur_rot_unit = 0
                self.rot_step_cnt.setdefault(ridx_pr, 0)

                if self.cur_rot_step_cnt == 0:
                    # Initial fetch, no replaced data yet.
                    assert self.rot_rnd_cnt_per_load is None
                else:
                    rot_rnd_cnt_per_load, rem_ = divmod(
                        self.cur_rot_step_cnt, self.rot_steps_per_round)
                    assert rem_ == 0
                    assert self.rot_rnd_cnt_per_load is None \
                            or self.rot_rnd_cnt_per_load == rot_rnd_cnt_per_load
                    self.rot_rnd_cnt_per_load = rot_rnd_cnt_per_load
                self.cur_rot_step_cnt = 0

            assert all(cnt <= subrng_cnt
                       for cnt, subrng_cnt in zip(cnt_pr, self.subrng_cnt_pr))

            # Subrange index.
            sridx_pr = self._subrange_idx_pr(idx_pr)

            # Rotation unit index.
            ru_idx = self._subrng_rot_unit_idx(sridx_pr)

            if ru_idx != self.cur_rot_unit:
                # Move to next rotation unit.

                if (self.cur_rot_unit + 1) * self.rot_unit_size \
                        >= self.subrng_num:
                    # The current rotation unit is the last one. Start a new
                    # rotation round.
                    # Do not rotate back to the initial state. Instead start
                    # from the current state.
                    self.cur_rot_unit = 0

                    self.last_wf_subrng_idx = 0
                    self.seq_wf_acc = 0

                elif self.cur_rot_unit * self.rot_unit_size \
                        + self.buf_subrng_num >= self.subrng_num:
                    # The last rotation unit is already local. No more rotation.
                    self.cur_rot_unit += 1

                else:
                    # Rotate by one rotation unit, but not exceeding the end.
                    offset = min(
                        self.rot_unit_size, self.subrng_num -
                        self.cur_rot_unit * self.rot_unit_size -
                        self.buf_subrng_num)
                    assert offset > 0

                    # All subranges shift by the above offset.
                    acc_ = (1. * offset /
                            self.buf_subrng_num) * self.subrng_num
                    self.rot_access += util.prod(self.subrng_cnt_pr) * acc_
                    self.cur_rot_unit += 1

                    # One rotation step.
                    self.rot_step_cnt[ridx_pr] += 1
                    self.cur_rot_step_cnt += 1

                    # Combine wide fetch with rotation.
                    self.wf_access -= self.seq_wf_acc
                    self.saved_wf_access += self.seq_wf_acc
                    self.seq_wf_acc = 0

                assert ru_idx == self.cur_rot_unit

            # Buffer index of which has this subrange.
            buf_idx = self._subrng_buf_idx(sridx_pr)

            # Wide fetch from possibly remote buffer.
            wf_acc = util.prod(cnt_pr) * (read + write) * buf_idx
            self.wf_access += wf_acc

            # Record amount of sequential wide fetch.
            subrng_idx = self.subrng_idx_dict[sridx_pr]
            if subrng_idx >= self.last_wf_subrng_idx:
                self.seq_wf_acc += wf_acc
            else:
                self.seq_wf_acc = wf_acc
            self.last_wf_subrng_idx = subrng_idx

            return ret
Esempio n. 10
0
    def test_nested_loop_desc_sanity(self):
        ''' Generated nested loop description sanity check. '''

        batch_size = 4

        for layer in self.convlayers.values() + self.fclayers.values() \
                + self.lrlayers.values() + self.fake_layers.values():

            ms = MapStrategyEyeriss(layer, batch_size, self.dim_array)

            for nld in ms.gen_nested_loop_desc():

                # Replication reduces numbers of IFM/OFM.
                self.assertGreaterEqual(layer.nifm, nld.loopcnt[le.IFM])
                self.assertGreaterEqual(layer.nofm, nld.loopcnt[le.OFM])
                # Folding increases batch size.
                self.assertEqual(nld.loopcnt[le.BAT] % batch_size, 0)

                # Total and unit ops.
                self.assertAlmostEqual(nld.total_ops(),
                                       layer.total_ops(batch_size))
                self.assertAlmostEqual(nld.unit_ops * util.prod(nld.loopcnt),
                                       layer.total_ops(batch_size))

                # Unit time and unit ops.
                # The difference is due to the loop occupation, which is not
                # counted in utilization.
                self.assertGreaterEqual(
                    nld.unit_time * ms.utilization() * self.dim_array.size(),
                    nld.unit_ops)

                # Total access at DRAM.
                self.assertAlmostEqual(
                    nld.total_access_at_of(me.DRAM, de.FIL),
                    layer.total_filter_size()
                    if isinstance(layer, ConvLayer) else 0)
                # IFM may have refetch due to folding.
                self.assertGreaterEqual(
                    nld.total_access_at_of(me.DRAM, de.IFM) + 1e-7,
                    layer.total_ifmap_size(batch_size))
                self.assertAlmostEqual(nld.total_access_at_of(me.DRAM, de.OFM),
                                       layer.total_ofmap_size(batch_size))

                # Unit access to REGF.
                self.assertAlmostEqual(
                    nld.unit_access[me.REGF][de.FIL] * util.prod(nld.loopcnt),
                    layer.total_ops(batch_size) if isinstance(
                        layer, ConvLayer) else 0)
                self.assertAlmostEqual(
                    nld.unit_access[me.REGF][de.IFM] * util.prod(nld.loopcnt),
                    layer.total_ops(batch_size))
                self.assertAlmostEqual(
                    nld.unit_access[me.REGF][de.OFM] * util.prod(nld.loopcnt),
                    layer.total_ops(batch_size))

                # Unit GBUF size and unit access to DRAM.
                self.assertTrue(
                    all(us >= ua for us, ua in zip(nld.usize_gbuf,
                                                   nld.unit_access[me.DRAM])))

                # Unit REGF size.
                if isinstance(layer, ConvLayer):
                    # See JSSC'17, IV. A. Dimensions Beyond 2-D in PE Array. 1).
                    self.assertEqual(nld.usize_regf[de.FIL], layer.wfil)
                    self.assertEqual(nld.usize_regf[de.IFM], layer.wfil)
                    self.assertEqual(nld.usize_regf[de.OFM], 1)

                # Data dimension loops.
                if isinstance(layer, ConvLayer):
                    self.assertEqual(nld.data_loops[de.FIL],
                                     DataDimLoops(le.IFM, le.OFM))
                    self.assertEqual(nld.data_loops[de.IFM],
                                     DataDimLoops(le.IFM, le.BAT))
                    self.assertEqual(nld.data_loops[de.OFM],
                                     DataDimLoops(le.OFM, le.BAT))
                elif isinstance(layer, ConvLayer):
                    self.assertEqual(nld.data_loops[de.FIL], DataDimLoops())
                    self.assertEqual(nld.data_loops[de.IFM],
                                     DataDimLoops(le.OFM, le.BAT))
                    self.assertEqual(nld.data_loops[de.OFM],
                                     DataDimLoops(le.OFM, le.BAT))
    def _sim_access_conv(self, lbs):
        '''
        Get data access by actually simulating and generating loops for CONV
        layer.
        '''
        self.assertTrue(lbs.is_valid(), '_sim_access_conv: invalid lbs.')

        data_loops = lbs.nld.data_loops

        lpts = zip(*lbs.bl_ts)

        # Get buffered unit counts at each level.
        dram_buf_cnt_pr_list = [
            tuple(util.prod(lpts[lpe]) for lpe in data_loops[dce].loops())
            for dce in range(de.NUM)
        ]
        gbuf_buf_cnt_pr_list = [
            tuple(util.prod(lpts[lpe][1:]) for lpe in data_loops[dce].loops())
            for dce in range(de.NUM)
        ]
        regf_buf_cnt_pr_list = [
            tuple(util.prod(lpts[lpe][2:]) for lpe in data_loops[dce].loops())
            for dce in range(de.NUM)
        ]

        # Initialize SimBuffer.
        drams = [None] * de.NUM
        for dce, buf_cnt_pr in enumerate(dram_buf_cnt_pr_list):
            drams[dce] = self._SimBuffer(
                dce,
                buf_cnt_pr,
                lbs.nld.unit_access[me.DRAM][dce] if lbs.stored_in_gbuf[dce]
                else lbs.nld.unit_access[me.GBUF][dce],
            )
        gbufs = [None] * de.NUM
        for dce, buf_cnt_pr in enumerate(gbuf_buf_cnt_pr_list):
            gbufs[dce] = self._SimBuffer(
                dce,
                buf_cnt_pr,
                lbs.nld.unit_access[me.GBUF][dce],
                bypass=(not lbs.stored_in_gbuf[dce]),
            )
        regfs = [None] * de.NUM
        for dce, buf_cnt_pr in enumerate(regf_buf_cnt_pr_list):
            regfs[dce] = self._SimBuffer(
                dce,
                buf_cnt_pr,
                lbs.nld.unit_access[me.REGF][dce],
            )

        # Already generated psum for OFM.
        ofm_psum = set()

        # Simulation.
        for idx_tuple in lbs.gen_index():

            for dce in range(de.NUM):

                idx_pr = tuple(data_loops[dce].take(idx_tuple))

                if dce == de.OFM:
                    # Fetch and writeback, unless for the first time (no fetch).
                    write = 1
                    read = 1 if idx_pr in ofm_psum else 0
                    ofm_psum.add(idx_pr)
                else:
                    read = 1
                    write = 0

                # PE.
                cnt_pr = (1, 1)

                # REGF.
                cnt_pr = regfs[dce].do_access(idx_pr, cnt_pr, read, write)
                if not any(cnt_pr):
                    continue

                # GBUF.
                cnt_pr = gbufs[dce].do_access(idx_pr, cnt_pr, read, write)
                if not any(cnt_pr):
                    continue

                # DRAM.
                cnt_pr = drams[dce].do_access(idx_pr, cnt_pr, read, write)
                if not any(cnt_pr):
                    continue

        dram_access = [drams[dce].access_size() for dce in range(de.NUM)]
        gbuf_access = [gbufs[dce].access_size() for dce in range(de.NUM)]
        return dram_access, gbuf_access
Esempio n. 12
0
 def test_limits(self):
     ''' Check limits. '''
     for fs in util.factorize(1024, 3, limits=(10, 20)):
         self.assertLessEqual(fs[0], 10)
         self.assertLessEqual(fs[1], 20)
         self.assertEqual(util.prod(fs), 1024)
Esempio n. 13
0
 def test_empty(self):
     ''' Empty. '''
     self.assertEqual(util.prod([]), 1)
     self.assertEqual(util.prod(tuple()), 1)
     self.assertEqual(util.prod(set()), 1)
Esempio n. 14
0
 def test_float(self):
     ''' Float. '''
     self.assertAlmostEqual(util.prod([1.1, 2, 3]), 6.6)
     self.assertAlmostEqual(util.prod([1.1, 2, -3.]), -6.6)