def _total_part_size(self, part, layerkey='PAR'):
        ''' Get the total partitioned data size. '''
        layer = self.layer[layerkey]

        nifm = util.idivc(layer.nifm, part.size(pe.INPP)) * part.size(pe.INPP)
        nofm = util.idivc(layer.nofm, part.size(pe.OUTP)) * part.size(pe.OUTP)
        hofm = util.idivc(layer.hofm,
                          part.dim(pe.OFMP).h) * part.dim(pe.OFMP).h
        wofm = util.idivc(layer.wofm,
                          part.dim(pe.OFMP).w) * part.dim(pe.OFMP).w
        batch_size = util.idivc(self.batch_size, part.size(pe.BATP)) \
                * part.size(pe.BATP)

        full_layer = ConvLayer(nifm, nofm, (hofm, wofm),
                               (layer.hfil, layer.wfil),
                               (layer.htrd, layer.wtrd))
        filter_size = full_layer.total_filter_size()
        ifmap_size = full_layer.total_ifmap_size(batch_size)
        ofmap_size = full_layer.total_ofmap_size(batch_size)

        self.assertGreaterEqual(filter_size, layer.total_filter_size())
        self.assertLess(filter_size, layer.total_filter_size() * 1.2 * 1.2)
        self.assertGreaterEqual(ofmap_size,
                                layer.total_ofmap_size(self.batch_size))
        self.assertLess(
            ofmap_size,
            layer.total_ofmap_size(self.batch_size) * 1.2 * 1.2 * 1.2)
        self.assertGreaterEqual(ifmap_size,
                                layer.total_ifmap_size(self.batch_size))

        return filter_size, ifmap_size, ofmap_size
    def test_bufshr_skip_rot_example(self):
        ''' Example scheme using bufshr that skips the single rotation. '''

        # Make a PartitionScheme that allows bufshr for IFM.
        part = PartitionScheme(order=range(pe.NUM),
                               pdims=((2, 2), (1, 1), (2, 1), (1, 1)))
        bufshr = BufShrScheme(self.par_proc_region, part)
        self.assertEqual(
            bufshr.size(de.IFM), 4, 'test_bufshr_skip_rot_example: '
            'made-up PartitionScheme is not expected: '
            '{}, bufshr size for {} {}.'.format(part, de.IFM,
                                                bufshr.size(de.IFM)))

        # Make a LoopBlockingScheme that has a single rotation for IFM.
        p_nld = self._part_nld(part)
        bl_ts = ((util.idivc(p_nld.loopcnt[le.IFM],
                             3), util.idivc(p_nld.loopcnt[le.OFM], 3),
                  util.idivc(p_nld.loopcnt[le.BAT], 2)), (1, 1, 2), (3, 3, 1))
        bl_ords = (tuple(range(le.NUM)), tuple(range(le.NUM)))
        lbs = LoopBlockingScheme(p_nld, bl_ts, bl_ords, self.resource['PAR'],
                                 bufshr, self.options['BUFSHR'])
        self.assertTrue(lbs.is_valid())
        self.assertGreater(sum(lbs.get_noc_access()), 0)
        self.assertEqual(
            lbs.bufshr_subgrp_size[de.IFM], 4, 'test_bufshr_skip_rot_example: '
            'made-up LoopBlockingScheme is not expected: '
            '{}, bufshr subgrp size for {} {}.'.format(
                (bl_ts, bl_ords), de.IFM, lbs.bufshr_subgrp_size[de.IFM]))
        self.assertGreater(
            lbs.bufshr_wide_fetch_width[de.IFM], 1,
            'test_bufshr_skip_rot_example: '
            'made-up LoopBlockingScheme is not expected: '
            '{}, bufshr wide fetch width for {} {}.'.format(
                (bl_ts, bl_ords), de.IFM, lbs.bufshr_wide_fetch_width[de.IFM]))
        self.assertEqual(
            lbs.bufshr_rot_round_cnt[de.IFM], 0,
            'test_bufshr_skip_rot_example: '
            'made-up LoopBlockingScheme is not expected: '
            '{}, bufshr rotation rounds for {} {}'.format(
                (bl_ts, bl_ords), de.IFM, lbs.bufshr_rot_round_cnt[de.IFM]))

        # Sim.
        dram_access, gbuf_access, bufshr_stats = \
                self._sim_access_conv(lbs, get_bufshr=True)

        self._verify_bufshr_stats(dram_access, gbuf_access, bufshr_stats, lbs,
                                  bufshr, 'test_bufshr_skip_rot_example')
 def _make_bl_ts(self, ti_part, to_part, tb_part, wlkey='BASE'):
     '''
     Make a set of blocking factors. `ti_part`, `to_part`, `tb_part` can
     contain one 0 value to be filled.
     '''
     try:
         idx = ti_part.index(0)
     except ValueError:
         ti = ti_part
     else:
         ti = [
             ti_part[x] if x != idx else util.idivc(
                 self.nld[wlkey].loopcnt[le.IFM],
                 util.prod(ti_part[:idx] + ti_part[idx + 1:]))
             for x in range(3)
         ]
     try:
         idx = to_part.index(0)
     except ValueError:
         to = to_part
     else:
         to = [
             to_part[x] if x != idx else util.idivc(
                 self.nld[wlkey].loopcnt[le.OFM],
                 util.prod(to_part[:idx] + to_part[idx + 1:]))
             for x in range(3)
         ]
     try:
         idx = tb_part.index(0)
     except ValueError:
         tb = tb_part
     else:
         tb = [
             tb_part[x] if x != idx else util.idivc(
                 self.nld[wlkey].loopcnt[le.BAT],
                 util.prod(tb_part[:idx] + tb_part[idx + 1:]))
             for x in range(3)
         ]
     lp_ts = [None] * le.NUM
     lp_ts[le.IFM] = ti
     lp_ts[le.OFM] = to
     lp_ts[le.BAT] = tb
     return tuple(zip(*lp_ts))
    def test_bufshr_multisubgrp_example(self):
        ''' Example scheme using bufshr with multiple subgroups in a group. '''

        # Make a PartitionScheme that allows bufshr for IFM.
        part = PartitionScheme(order=list(reversed(range(pe.NUM))),
                               pdims=((2, 2), (1, 1), (2, 1), (1, 1)))
        bufshr = BufShrScheme(self.par_proc_region, part)
        self.assertEqual(
            bufshr.size(de.IFM), 4, 'test_bufshr_multisubgrp_example: '
            'made-up PartitionScheme is not expected: '
            '{}, bufshr size for {} {}.'.format(part, de.IFM,
                                                bufshr.size(de.IFM)))

        # Make a LoopBlockingScheme that has multi subgroups per group for IFM.
        p_nld = self._part_nld(part)
        bl_ts = ((util.idivc(p_nld.loopcnt[le.IFM],
                             1), util.idivc(p_nld.loopcnt[le.OFM], 3),
                  util.idivc(p_nld.loopcnt[le.BAT], 2)), (1, 3, 2), (1, 1, 1))
        # At GBUF level, from inner to outer: le.BAT, le.OFM, le.IFM.
        bl_ords = (tuple(range(le.NUM)), (2, 1, 0))
        lbs = LoopBlockingScheme(p_nld, bl_ts, bl_ords, self.resource['PAR'],
                                 bufshr, self.options['BUFSHR'])
        self.assertTrue(lbs.is_valid())
        self.assertGreater(sum(lbs.get_noc_access()), 0)
        self.assertGreater(
            lbs.bufshr_grp_size[de.IFM], lbs.bufshr_subgrp_size[de.IFM],
            'test_bufshr_multisubgrp_example: '
            'made-up LoopBlockingScheme is not expected: '
            '{}, bufshr grp size {}, bufshr subgrp size {}'.format(
                (bl_ts, bl_ords), lbs.bufshr_grp_size, lbs.bufshr_subgrp_size))
        self.assertGreater(
            lbs.bufshr_rot_round_cnt[de.IFM], 0,
            'test_bufshr_multisubgrp_example: '
            'made-up LoopBlockingScheme is not expected: '
            '{}, bufshr rotation rounds for {} {}'.format(
                (bl_ts, bl_ords), de.IFM, lbs.bufshr_rot_round_cnt[de.IFM]))

        # Sim.
        dram_access, gbuf_access, bufshr_stats = \
                self._sim_access_conv(lbs, get_bufshr=True)

        self._verify_bufshr_stats(dram_access, gbuf_access, bufshr_stats, lbs,
                                  bufshr, 'test_bufshr_multisubgrp_example')
예제 #5
0
    def test_int(self):
        ''' Int. '''
        self.assertTrue(util.approx_dividable(24, 2, overhead=0))
        self.assertTrue(util.approx_dividable(24, 3, overhead=0))
        self.assertTrue(util.approx_dividable(24, 4, overhead=0))

        self.assertTrue(util.approx_dividable(7, 2))
        self.assertTrue(util.approx_dividable(19, 7))
        self.assertFalse(util.approx_dividable(22, 7))

        ovhd = util.idivc(19, 7) * 7 / 19. - 1
        self.assertFalse(util.approx_dividable(19, 7, overhead=ovhd - 0.01))
        self.assertTrue(util.approx_dividable(19, 7, overhead=ovhd + 0.01))
    def test_bufshr_rotation_example(self):
        ''' Example scheme using bufshr with rotation. '''

        # Make a PartitionScheme that allows bufshr for all data categories.
        part = PartitionScheme(order=range(pe.NUM),
                               pdims=((2, 1), (1, 2), (1, 1), (2, 1)))
        bufshr = BufShrScheme(self.par_proc_region, part)
        self.assertTrue(
            all(bufshr.size(dce) > 1 for dce in range(de.NUM)),
            'test_bufshr_rotation_example: '
            'made-up PartitionScheme is not expected: '
            '{}, bufshr size {}'.format(
                part, [bufshr.size(dce) for dce in range(de.NUM)]))

        # Make a LoopBlockingScheme that uses bufshr for all data categories.
        p_nld = self._part_nld(part)
        bl_ts = ((util.idivc(p_nld.loopcnt[le.IFM],
                             6), util.idivc(p_nld.loopcnt[le.OFM], 9),
                  util.idivc(p_nld.loopcnt[le.BAT], 2)), (3, 3, 2), (2, 3, 1))
        bl_ords = (tuple(range(le.NUM)), tuple(range(le.NUM)))
        lbs = LoopBlockingScheme(p_nld, bl_ts, bl_ords, self.resource['PAR'],
                                 bufshr, self.options['BUFSHR'])
        self.assertTrue(lbs.is_valid())
        self.assertGreater(sum(lbs.get_noc_access()), 0)
        self.assertTrue(
            all(sgs > 1 for sgs in lbs.bufshr_subgrp_size)
            and all(t > 1 for t in bl_ts[0]), 'test_bufshr_rotation_example: '
            'made-up LoopBlockingScheme is not expected: '
            '{}, top factors {}, bufshr subgrp size {}'.format(
                (bl_ts, bl_ords), bl_ts[0], lbs.bufshr_subgrp_size))

        # Sim.
        dram_access, gbuf_access, bufshr_stats = \
                self._sim_access_conv(lbs, get_bufshr=True)

        self._verify_bufshr_stats(dram_access, gbuf_access, bufshr_stats, lbs,
                                  bufshr, 'test_bufshr_rotation_example')
    def test_nested_loop_desc_fold_w(self):
        ''' Generated nested loop description when folding width. '''

        layer = self.convlayers['conv1']
        batch_size = 4
        occ = 1

        ms = MapStrategyEyeriss(layer, batch_size, occ, self.dim_array)

        self.assertTupleEqual(ms.repl, (1, 1))
        self.assertEqual(ms.fold.h, 1)
        self.assertGreater(ms.fold.w, 1)

        # Only 1 possible nld.
        nld_list = list(ms.gen_nested_loop_desc())
        self.assertEqual(len(nld_list), 1)
        nld = nld_list[0]

        # Fold to batch size.
        fold_w = ms.fold.w
        folded_layer = ConvLayer(layer.nifm,
                                 layer.nofm,
                                 (util.idivc(layer.hofm, fold_w), layer.wofm),
                                 (layer.hfil, layer.wfil),
                                 strd=(layer.htrd, layer.wtrd))
        folded_batch_size = batch_size * fold_w

        locc = layer.total_ops(batch_size) \
                / folded_layer.total_ops(folded_batch_size)
        self.assertLessEqual(locc, 1)

        self.assertEqual(nld.loopcnt[le.IFM], folded_layer.nifm)
        self.assertEqual(nld.loopcnt[le.OFM], folded_layer.nofm)
        self.assertEqual(nld.loopcnt[le.BAT], folded_batch_size)

        self.assertEqual(nld.usize_gbuf[de.FIL], folded_layer.filter_size())
        self.assertEqual(nld.usize_gbuf[de.IFM], folded_layer.ifmap_size())
        self.assertEqual(nld.usize_gbuf[de.OFM], folded_layer.ofmap_size())

        # DRAM and GBUF accesses are equal.
        self.assertTupleEqual(nld.unit_access[me.DRAM],
                              nld.unit_access[me.GBUF])
예제 #8
0
 def test_negative(self):
     ''' Negative. '''
     self.assertEqual(util.idivc(34, 4), 9, 'idivc: negative')
     self.assertEqual(util.idivc(-34, 4), -8, 'idivc: negative')
     self.assertEqual(util.idivc(34, -4), -8, 'idivc: negative')
     self.assertEqual(util.idivc(-34, -4), 9, 'idivc: negative')
예제 #9
0
 def test_int(self):
     ''' Int. '''
     self.assertEqual(util.idivc(8, 3), 3)
     self.assertEqual(util.idivc(8, 2), 4)
     self.assertEqual(util.idivc(8, 1), 8)
예제 #10
0
 def test_inf(self):
     ''' Inf. '''
     self.assertEqual(util.idivc(3, float('inf')), 0, 'idivc: inf')
     self.assertTrue(math.isnan(util.idivc(float('inf'), float('inf'))),
                     'idivc: inf')
예제 #11
0
 def test_float(self):
     ''' Float. '''
     self.assertAlmostEqual(util.idivc(4.3, 3), 2)
     self.assertAlmostEqual(util.idivc(34.3, 3), 12)
     self.assertAlmostEqual(util.idivc(34, 3.), 12)
예제 #12
0
 def test_zero(self):
     ''' Zero. '''
     self.assertEqual(util.idivc(0, 3), 0, 'idivc: zero')
     with self.assertRaises(ZeroDivisionError):
         _ = util.idivc(3, 0)