def test_bufshr_wide_fetch_example(self): ''' Example scheme using bufshr with wide fetch. ''' # Make a PartitionScheme that allows bufshr for IFM. part = PartitionScheme(order=range(pe.NUM), pdims=((2, 2), (1, 1), (2, 1), (1, 1))) bufshr = BufShrScheme(self.par_proc_region, part) self.assertEqual( bufshr.size(de.IFM), 4, 'test_bufshr_wide_fetch_example: ' 'made-up PartitionScheme is not expected: ' '{}, bufshr size for {} {}.'.format(part, de.IFM, bufshr.size(de.IFM))) for t1, t2 in [((3, 3, 1), (1, 1, 2)), ((1, 3, 2), (3, 1, 1))]: # Make a LoopBlockingScheme that has wide fetch for IFM. p_nld = self._part_nld(part) bl_ts = (tuple( util.idivc(p_nld.loopcnt[lpe], t1[lpe] * t2[lpe]) for lpe in range(le.NUM)), t1, t2) # At GBUF level, from inner to outer: le.BAT, le.IFM, le.OFM. bl_ords = (tuple(range(le.NUM)), (1, 2, 0)) lbs = LoopBlockingScheme(p_nld, bl_ts, bl_ords, self.resource['PAR'], bufshr, self.options['BUFSHR']) self.assertTrue(lbs.is_valid()) self.assertGreater(sum(lbs.get_noc_access()), 0) self.assertEqual( lbs.bufshr_subgrp_size[de.IFM], 4, 'test_bufshr_wide_fetch_example: ' 'made-up LoopBlockingScheme is not expected: ' '{}, bufshr subgrp size for {} {}.'.format( (bl_ts, bl_ords), de.IFM, lbs.bufshr_subgrp_size[de.IFM])) self.assertGreater( lbs.bufshr_wide_fetch_width[de.IFM], 1, 'test_bufshr_wide_fetch_example: ' 'made-up LoopBlockingScheme is not expected: ' '{}, bufshr wide fetch width for {} {}.'.format( (bl_ts, bl_ords), de.IFM, lbs.bufshr_wide_fetch_width[de.IFM])) self.assertGreater( lbs.bufshr_rot_round_cnt[de.IFM], 0, 'test_bufshr_wide_fetch_example: ' 'made-up LoopBlockingScheme is not expected: ' '{}, bufshr rotation rounds for {} {}'.format( (bl_ts, bl_ords), de.IFM, lbs.bufshr_rot_round_cnt[de.IFM])) # Sim. dram_access, gbuf_access, bufshr_stats = \ self._sim_access_conv(lbs, get_bufshr=True) self._verify_bufshr_stats(dram_access, gbuf_access, bufshr_stats, lbs, bufshr, 'test_bufshr_wide_fetch_example')
def test_bufshr_multisubgrp_example(self): ''' Example scheme using bufshr with multiple subgroups in a group. ''' # Make a PartitionScheme that allows bufshr for IFM. part = PartitionScheme(order=list(reversed(range(pe.NUM))), pdims=((2, 2), (1, 1), (2, 1), (1, 1))) bufshr = BufShrScheme(self.par_proc_region, part) self.assertEqual( bufshr.size(de.IFM), 4, 'test_bufshr_multisubgrp_example: ' 'made-up PartitionScheme is not expected: ' '{}, bufshr size for {} {}.'.format(part, de.IFM, bufshr.size(de.IFM))) # Make a LoopBlockingScheme that has multi subgroups per group for IFM. p_nld = self._part_nld(part) bl_ts = ((util.idivc(p_nld.loopcnt[le.IFM], 1), util.idivc(p_nld.loopcnt[le.OFM], 3), util.idivc(p_nld.loopcnt[le.BAT], 2)), (1, 3, 2), (1, 1, 1)) # At GBUF level, from inner to outer: le.BAT, le.OFM, le.IFM. bl_ords = (tuple(range(le.NUM)), (2, 1, 0)) lbs = LoopBlockingScheme(p_nld, bl_ts, bl_ords, self.resource['PAR'], bufshr, self.options['BUFSHR']) self.assertTrue(lbs.is_valid()) self.assertGreater(sum(lbs.get_noc_access()), 0) self.assertGreater( lbs.bufshr_grp_size[de.IFM], lbs.bufshr_subgrp_size[de.IFM], 'test_bufshr_multisubgrp_example: ' 'made-up LoopBlockingScheme is not expected: ' '{}, bufshr grp size {}, bufshr subgrp size {}'.format( (bl_ts, bl_ords), lbs.bufshr_grp_size, lbs.bufshr_subgrp_size)) self.assertGreater( lbs.bufshr_rot_round_cnt[de.IFM], 0, 'test_bufshr_multisubgrp_example: ' 'made-up LoopBlockingScheme is not expected: ' '{}, bufshr rotation rounds for {} {}'.format( (bl_ts, bl_ords), de.IFM, lbs.bufshr_rot_round_cnt[de.IFM])) # Sim. dram_access, gbuf_access, bufshr_stats = \ self._sim_access_conv(lbs, get_bufshr=True) self._verify_bufshr_stats(dram_access, gbuf_access, bufshr_stats, lbs, bufshr, 'test_bufshr_multisubgrp_example')
def _lbs(self, bl_ts, bl_ords=None, wlkey='BASE', rsrckey='BASE', optkey='BASE'): ''' Make a LoopBlockingScheme instance. ''' bl_ords = (tuple(range(le.NUM)), tuple(range(le.NUM))) \ if not bl_ords else bl_ords return LoopBlockingScheme(self.nld[wlkey], bl_ts, bl_ords, self.resource[rsrckey], self.bufshr, self.options[optkey])
def test_bufshr_rotation_example(self): ''' Example scheme using bufshr with rotation. ''' # Make a PartitionScheme that allows bufshr for all data categories. part = PartitionScheme(order=range(pe.NUM), pdims=((2, 1), (1, 2), (1, 1), (2, 1))) bufshr = BufShrScheme(self.par_proc_region, part) self.assertTrue( all(bufshr.size(dce) > 1 for dce in range(de.NUM)), 'test_bufshr_rotation_example: ' 'made-up PartitionScheme is not expected: ' '{}, bufshr size {}'.format( part, [bufshr.size(dce) for dce in range(de.NUM)])) # Make a LoopBlockingScheme that uses bufshr for all data categories. p_nld = self._part_nld(part) bl_ts = ((util.idivc(p_nld.loopcnt[le.IFM], 6), util.idivc(p_nld.loopcnt[le.OFM], 9), util.idivc(p_nld.loopcnt[le.BAT], 2)), (3, 3, 2), (2, 3, 1)) bl_ords = (tuple(range(le.NUM)), tuple(range(le.NUM))) lbs = LoopBlockingScheme(p_nld, bl_ts, bl_ords, self.resource['PAR'], bufshr, self.options['BUFSHR']) self.assertTrue(lbs.is_valid()) self.assertGreater(sum(lbs.get_noc_access()), 0) self.assertTrue( all(sgs > 1 for sgs in lbs.bufshr_subgrp_size) and all(t > 1 for t in bl_ts[0]), 'test_bufshr_rotation_example: ' 'made-up LoopBlockingScheme is not expected: ' '{}, top factors {}, bufshr subgrp size {}'.format( (bl_ts, bl_ords), bl_ts[0], lbs.bufshr_subgrp_size)) # Sim. dram_access, gbuf_access, bufshr_stats = \ self._sim_access_conv(lbs, get_bufshr=True) self._verify_bufshr_stats(dram_access, gbuf_access, bufshr_stats, lbs, bufshr, 'test_bufshr_rotation_example')
def test_ordered_loops(self): ''' Get ordered_loops. ''' assert list(range(le.NUM)) == [le.IFM, le.OFM, le.BAT] self.assertListEqual( LoopBlockingScheme.ordered_loops((3, 5, 2), (2, 0, 1)), [(le.IFM, 3), (le.BAT, 2), (le.OFM, 5)]) # Trivial loops at different positions. self.assertListEqual( LoopBlockingScheme.ordered_loops((3, 5, 1), (0, 1, 2)), [(le.OFM, 5), (le.IFM, 3)]) self.assertListEqual( LoopBlockingScheme.ordered_loops((3, 5, 1), (1, 2, 0)), [(le.OFM, 5), (le.IFM, 3)]) self.assertListEqual( LoopBlockingScheme.ordered_loops((3, 5, 1), (0, 2, 1)), [(le.OFM, 5), (le.IFM, 3)]) # Different loops are trivial. self.assertListEqual( LoopBlockingScheme.ordered_loops((1, 5, 2), (0, 2, 1)), [(le.OFM, 5), (le.BAT, 2)]) self.assertListEqual( LoopBlockingScheme.ordered_loops((3, 1, 2), (0, 2, 1)), [(le.BAT, 2), (le.IFM, 3)]) # Multiple trivial loops. self.assertListEqual( LoopBlockingScheme.ordered_loops((1, 5, 1), (0, 1, 2)), [(le.OFM, 5)]) self.assertListEqual( LoopBlockingScheme.ordered_loops((1, 1, 1), (0, 1, 2)), []) for bl_t, bl_ord in itertools.product( itertools.product(*[range(1, 8)] * 3), itertools.permutations(range(le.NUM))): ord_loops = LoopBlockingScheme.ordered_loops(bl_t, bl_ord) self.assertTrue(all(len(tpl) == 2 for tpl in ord_loops)) self.assertFalse(any(tpl[1] <= 1 for tpl in ord_loops)) self.assertEqual(len(ord_loops), le.NUM - bl_t.count(1)) self.assertTrue(all(tpl[1] == bl_t[tpl[0]] for tpl in ord_loops)) rev_loops = LoopBlockingScheme.ordered_loops(bl_t, bl_ord, reverse=True) ord_lpes = LoopBlockingScheme.ordered_loops(bl_t, bl_ord, lpe_only=True) self.assertEqual(len(rev_loops), len(ord_loops)) self.assertEqual(len(ord_lpes), len(ord_loops)) self.assertListEqual(list(reversed(rev_loops)), ord_loops) self.assertListEqual([tpl[0] for tpl in ord_loops], ord_lpes)