def _total_part_size(self, part, layerkey='PAR'): ''' Get the total partitioned data size. ''' layer = self.layer[layerkey] nifm = util.idivc(layer.nifm, part.size(pe.INPP)) * part.size(pe.INPP) nofm = util.idivc(layer.nofm, part.size(pe.OUTP)) * part.size(pe.OUTP) hofm = util.idivc(layer.hofm, part.dim(pe.OFMP).h) * part.dim(pe.OFMP).h wofm = util.idivc(layer.wofm, part.dim(pe.OFMP).w) * part.dim(pe.OFMP).w batch_size = util.idivc(self.batch_size, part.size(pe.BATP)) \ * part.size(pe.BATP) full_layer = ConvLayer(nifm, nofm, (hofm, wofm), (layer.hfil, layer.wfil), (layer.htrd, layer.wtrd)) filter_size = full_layer.total_filter_size() ifmap_size = full_layer.total_ifmap_size(batch_size) ofmap_size = full_layer.total_ofmap_size(batch_size) self.assertGreaterEqual(filter_size, layer.total_filter_size()) self.assertLess(filter_size, layer.total_filter_size() * 1.2 * 1.2) self.assertGreaterEqual(ofmap_size, layer.total_ofmap_size(self.batch_size)) self.assertLess( ofmap_size, layer.total_ofmap_size(self.batch_size) * 1.2 * 1.2 * 1.2) self.assertGreaterEqual(ifmap_size, layer.total_ifmap_size(self.batch_size)) return filter_size, ifmap_size, ofmap_size
def test_bufshr_skip_rot_example(self): ''' Example scheme using bufshr that skips the single rotation. ''' # Make a PartitionScheme that allows bufshr for IFM. part = PartitionScheme(order=range(pe.NUM), pdims=((2, 2), (1, 1), (2, 1), (1, 1))) bufshr = BufShrScheme(self.par_proc_region, part) self.assertEqual( bufshr.size(de.IFM), 4, 'test_bufshr_skip_rot_example: ' 'made-up PartitionScheme is not expected: ' '{}, bufshr size for {} {}.'.format(part, de.IFM, bufshr.size(de.IFM))) # Make a LoopBlockingScheme that has a single rotation for IFM. p_nld = self._part_nld(part) bl_ts = ((util.idivc(p_nld.loopcnt[le.IFM], 3), util.idivc(p_nld.loopcnt[le.OFM], 3), util.idivc(p_nld.loopcnt[le.BAT], 2)), (1, 1, 2), (3, 3, 1)) bl_ords = (tuple(range(le.NUM)), tuple(range(le.NUM))) lbs = LoopBlockingScheme(p_nld, bl_ts, bl_ords, self.resource['PAR'], bufshr, self.options['BUFSHR']) self.assertTrue(lbs.is_valid()) self.assertGreater(sum(lbs.get_noc_access()), 0) self.assertEqual( lbs.bufshr_subgrp_size[de.IFM], 4, 'test_bufshr_skip_rot_example: ' 'made-up LoopBlockingScheme is not expected: ' '{}, bufshr subgrp size for {} {}.'.format( (bl_ts, bl_ords), de.IFM, lbs.bufshr_subgrp_size[de.IFM])) self.assertGreater( lbs.bufshr_wide_fetch_width[de.IFM], 1, 'test_bufshr_skip_rot_example: ' 'made-up LoopBlockingScheme is not expected: ' '{}, bufshr wide fetch width for {} {}.'.format( (bl_ts, bl_ords), de.IFM, lbs.bufshr_wide_fetch_width[de.IFM])) self.assertEqual( lbs.bufshr_rot_round_cnt[de.IFM], 0, 'test_bufshr_skip_rot_example: ' 'made-up LoopBlockingScheme is not expected: ' '{}, bufshr rotation rounds for {} {}'.format( (bl_ts, bl_ords), de.IFM, lbs.bufshr_rot_round_cnt[de.IFM])) # Sim. dram_access, gbuf_access, bufshr_stats = \ self._sim_access_conv(lbs, get_bufshr=True) self._verify_bufshr_stats(dram_access, gbuf_access, bufshr_stats, lbs, bufshr, 'test_bufshr_skip_rot_example')
def _make_bl_ts(self, ti_part, to_part, tb_part, wlkey='BASE'): ''' Make a set of blocking factors. `ti_part`, `to_part`, `tb_part` can contain one 0 value to be filled. ''' try: idx = ti_part.index(0) except ValueError: ti = ti_part else: ti = [ ti_part[x] if x != idx else util.idivc( self.nld[wlkey].loopcnt[le.IFM], util.prod(ti_part[:idx] + ti_part[idx + 1:])) for x in range(3) ] try: idx = to_part.index(0) except ValueError: to = to_part else: to = [ to_part[x] if x != idx else util.idivc( self.nld[wlkey].loopcnt[le.OFM], util.prod(to_part[:idx] + to_part[idx + 1:])) for x in range(3) ] try: idx = tb_part.index(0) except ValueError: tb = tb_part else: tb = [ tb_part[x] if x != idx else util.idivc( self.nld[wlkey].loopcnt[le.BAT], util.prod(tb_part[:idx] + tb_part[idx + 1:])) for x in range(3) ] lp_ts = [None] * le.NUM lp_ts[le.IFM] = ti lp_ts[le.OFM] = to lp_ts[le.BAT] = tb return tuple(zip(*lp_ts))
def test_bufshr_multisubgrp_example(self): ''' Example scheme using bufshr with multiple subgroups in a group. ''' # Make a PartitionScheme that allows bufshr for IFM. part = PartitionScheme(order=list(reversed(range(pe.NUM))), pdims=((2, 2), (1, 1), (2, 1), (1, 1))) bufshr = BufShrScheme(self.par_proc_region, part) self.assertEqual( bufshr.size(de.IFM), 4, 'test_bufshr_multisubgrp_example: ' 'made-up PartitionScheme is not expected: ' '{}, bufshr size for {} {}.'.format(part, de.IFM, bufshr.size(de.IFM))) # Make a LoopBlockingScheme that has multi subgroups per group for IFM. p_nld = self._part_nld(part) bl_ts = ((util.idivc(p_nld.loopcnt[le.IFM], 1), util.idivc(p_nld.loopcnt[le.OFM], 3), util.idivc(p_nld.loopcnt[le.BAT], 2)), (1, 3, 2), (1, 1, 1)) # At GBUF level, from inner to outer: le.BAT, le.OFM, le.IFM. bl_ords = (tuple(range(le.NUM)), (2, 1, 0)) lbs = LoopBlockingScheme(p_nld, bl_ts, bl_ords, self.resource['PAR'], bufshr, self.options['BUFSHR']) self.assertTrue(lbs.is_valid()) self.assertGreater(sum(lbs.get_noc_access()), 0) self.assertGreater( lbs.bufshr_grp_size[de.IFM], lbs.bufshr_subgrp_size[de.IFM], 'test_bufshr_multisubgrp_example: ' 'made-up LoopBlockingScheme is not expected: ' '{}, bufshr grp size {}, bufshr subgrp size {}'.format( (bl_ts, bl_ords), lbs.bufshr_grp_size, lbs.bufshr_subgrp_size)) self.assertGreater( lbs.bufshr_rot_round_cnt[de.IFM], 0, 'test_bufshr_multisubgrp_example: ' 'made-up LoopBlockingScheme is not expected: ' '{}, bufshr rotation rounds for {} {}'.format( (bl_ts, bl_ords), de.IFM, lbs.bufshr_rot_round_cnt[de.IFM])) # Sim. dram_access, gbuf_access, bufshr_stats = \ self._sim_access_conv(lbs, get_bufshr=True) self._verify_bufshr_stats(dram_access, gbuf_access, bufshr_stats, lbs, bufshr, 'test_bufshr_multisubgrp_example')
def test_int(self): ''' Int. ''' self.assertTrue(util.approx_dividable(24, 2, overhead=0)) self.assertTrue(util.approx_dividable(24, 3, overhead=0)) self.assertTrue(util.approx_dividable(24, 4, overhead=0)) self.assertTrue(util.approx_dividable(7, 2)) self.assertTrue(util.approx_dividable(19, 7)) self.assertFalse(util.approx_dividable(22, 7)) ovhd = util.idivc(19, 7) * 7 / 19. - 1 self.assertFalse(util.approx_dividable(19, 7, overhead=ovhd - 0.01)) self.assertTrue(util.approx_dividable(19, 7, overhead=ovhd + 0.01))
def test_bufshr_rotation_example(self): ''' Example scheme using bufshr with rotation. ''' # Make a PartitionScheme that allows bufshr for all data categories. part = PartitionScheme(order=range(pe.NUM), pdims=((2, 1), (1, 2), (1, 1), (2, 1))) bufshr = BufShrScheme(self.par_proc_region, part) self.assertTrue( all(bufshr.size(dce) > 1 for dce in range(de.NUM)), 'test_bufshr_rotation_example: ' 'made-up PartitionScheme is not expected: ' '{}, bufshr size {}'.format( part, [bufshr.size(dce) for dce in range(de.NUM)])) # Make a LoopBlockingScheme that uses bufshr for all data categories. p_nld = self._part_nld(part) bl_ts = ((util.idivc(p_nld.loopcnt[le.IFM], 6), util.idivc(p_nld.loopcnt[le.OFM], 9), util.idivc(p_nld.loopcnt[le.BAT], 2)), (3, 3, 2), (2, 3, 1)) bl_ords = (tuple(range(le.NUM)), tuple(range(le.NUM))) lbs = LoopBlockingScheme(p_nld, bl_ts, bl_ords, self.resource['PAR'], bufshr, self.options['BUFSHR']) self.assertTrue(lbs.is_valid()) self.assertGreater(sum(lbs.get_noc_access()), 0) self.assertTrue( all(sgs > 1 for sgs in lbs.bufshr_subgrp_size) and all(t > 1 for t in bl_ts[0]), 'test_bufshr_rotation_example: ' 'made-up LoopBlockingScheme is not expected: ' '{}, top factors {}, bufshr subgrp size {}'.format( (bl_ts, bl_ords), bl_ts[0], lbs.bufshr_subgrp_size)) # Sim. dram_access, gbuf_access, bufshr_stats = \ self._sim_access_conv(lbs, get_bufshr=True) self._verify_bufshr_stats(dram_access, gbuf_access, bufshr_stats, lbs, bufshr, 'test_bufshr_rotation_example')
def test_nested_loop_desc_fold_w(self): ''' Generated nested loop description when folding width. ''' layer = self.convlayers['conv1'] batch_size = 4 occ = 1 ms = MapStrategyEyeriss(layer, batch_size, occ, self.dim_array) self.assertTupleEqual(ms.repl, (1, 1)) self.assertEqual(ms.fold.h, 1) self.assertGreater(ms.fold.w, 1) # Only 1 possible nld. nld_list = list(ms.gen_nested_loop_desc()) self.assertEqual(len(nld_list), 1) nld = nld_list[0] # Fold to batch size. fold_w = ms.fold.w folded_layer = ConvLayer(layer.nifm, layer.nofm, (util.idivc(layer.hofm, fold_w), layer.wofm), (layer.hfil, layer.wfil), strd=(layer.htrd, layer.wtrd)) folded_batch_size = batch_size * fold_w locc = layer.total_ops(batch_size) \ / folded_layer.total_ops(folded_batch_size) self.assertLessEqual(locc, 1) self.assertEqual(nld.loopcnt[le.IFM], folded_layer.nifm) self.assertEqual(nld.loopcnt[le.OFM], folded_layer.nofm) self.assertEqual(nld.loopcnt[le.BAT], folded_batch_size) self.assertEqual(nld.usize_gbuf[de.FIL], folded_layer.filter_size()) self.assertEqual(nld.usize_gbuf[de.IFM], folded_layer.ifmap_size()) self.assertEqual(nld.usize_gbuf[de.OFM], folded_layer.ofmap_size()) # DRAM and GBUF accesses are equal. self.assertTupleEqual(nld.unit_access[me.DRAM], nld.unit_access[me.GBUF])
def test_negative(self): ''' Negative. ''' self.assertEqual(util.idivc(34, 4), 9, 'idivc: negative') self.assertEqual(util.idivc(-34, 4), -8, 'idivc: negative') self.assertEqual(util.idivc(34, -4), -8, 'idivc: negative') self.assertEqual(util.idivc(-34, -4), 9, 'idivc: negative')
def test_int(self): ''' Int. ''' self.assertEqual(util.idivc(8, 3), 3) self.assertEqual(util.idivc(8, 2), 4) self.assertEqual(util.idivc(8, 1), 8)
def test_inf(self): ''' Inf. ''' self.assertEqual(util.idivc(3, float('inf')), 0, 'idivc: inf') self.assertTrue(math.isnan(util.idivc(float('inf'), float('inf'))), 'idivc: inf')
def test_float(self): ''' Float. ''' self.assertAlmostEqual(util.idivc(4.3, 3), 2) self.assertAlmostEqual(util.idivc(34.3, 3), 12) self.assertAlmostEqual(util.idivc(34, 3.), 12)
def test_zero(self): ''' Zero. ''' self.assertEqual(util.idivc(0, 3), 0, 'idivc: zero') with self.assertRaises(ZeroDivisionError): _ = util.idivc(3, 0)