def test_bufshr_wide_fetch_example(self):
        ''' Example scheme using bufshr with wide fetch. '''

        # Make a PartitionScheme that allows bufshr for IFM.
        part = PartitionScheme(order=range(pe.NUM),
                               pdims=((2, 2), (1, 1), (2, 1), (1, 1)))
        bufshr = BufShrScheme(self.par_proc_region, part)
        self.assertEqual(
            bufshr.size(de.IFM), 4, 'test_bufshr_wide_fetch_example: '
            'made-up PartitionScheme is not expected: '
            '{}, bufshr size for {} {}.'.format(part, de.IFM,
                                                bufshr.size(de.IFM)))

        for t1, t2 in [((3, 3, 1), (1, 1, 2)), ((1, 3, 2), (3, 1, 1))]:
            # Make a LoopBlockingScheme that has wide fetch for IFM.
            p_nld = self._part_nld(part)
            bl_ts = (tuple(
                util.idivc(p_nld.loopcnt[lpe], t1[lpe] * t2[lpe])
                for lpe in range(le.NUM)), t1, t2)
            # At GBUF level, from inner to outer: le.BAT, le.IFM, le.OFM.
            bl_ords = (tuple(range(le.NUM)), (1, 2, 0))
            lbs = LoopBlockingScheme(p_nld, bl_ts, bl_ords,
                                     self.resource['PAR'], bufshr,
                                     self.options['BUFSHR'])
            self.assertTrue(lbs.is_valid())
            self.assertGreater(sum(lbs.get_noc_access()), 0)
            self.assertEqual(
                lbs.bufshr_subgrp_size[de.IFM], 4,
                'test_bufshr_wide_fetch_example: '
                'made-up LoopBlockingScheme is not expected: '
                '{}, bufshr subgrp size for {} {}.'.format(
                    (bl_ts, bl_ords), de.IFM, lbs.bufshr_subgrp_size[de.IFM]))
            self.assertGreater(
                lbs.bufshr_wide_fetch_width[de.IFM], 1,
                'test_bufshr_wide_fetch_example: '
                'made-up LoopBlockingScheme is not expected: '
                '{}, bufshr wide fetch width for {} {}.'.format(
                    (bl_ts, bl_ords), de.IFM,
                    lbs.bufshr_wide_fetch_width[de.IFM]))
            self.assertGreater(
                lbs.bufshr_rot_round_cnt[de.IFM], 0,
                'test_bufshr_wide_fetch_example: '
                'made-up LoopBlockingScheme is not expected: '
                '{}, bufshr rotation rounds for {} {}'.format(
                    (bl_ts, bl_ords), de.IFM,
                    lbs.bufshr_rot_round_cnt[de.IFM]))

            # Sim.
            dram_access, gbuf_access, bufshr_stats = \
                    self._sim_access_conv(lbs, get_bufshr=True)

            self._verify_bufshr_stats(dram_access, gbuf_access, bufshr_stats,
                                      lbs, bufshr,
                                      'test_bufshr_wide_fetch_example')
    def test_bufshr_multisubgrp_example(self):
        ''' Example scheme using bufshr with multiple subgroups in a group. '''

        # Make a PartitionScheme that allows bufshr for IFM.
        part = PartitionScheme(order=list(reversed(range(pe.NUM))),
                               pdims=((2, 2), (1, 1), (2, 1), (1, 1)))
        bufshr = BufShrScheme(self.par_proc_region, part)
        self.assertEqual(
            bufshr.size(de.IFM), 4, 'test_bufshr_multisubgrp_example: '
            'made-up PartitionScheme is not expected: '
            '{}, bufshr size for {} {}.'.format(part, de.IFM,
                                                bufshr.size(de.IFM)))

        # Make a LoopBlockingScheme that has multi subgroups per group for IFM.
        p_nld = self._part_nld(part)
        bl_ts = ((util.idivc(p_nld.loopcnt[le.IFM],
                             1), util.idivc(p_nld.loopcnt[le.OFM], 3),
                  util.idivc(p_nld.loopcnt[le.BAT], 2)), (1, 3, 2), (1, 1, 1))
        # At GBUF level, from inner to outer: le.BAT, le.OFM, le.IFM.
        bl_ords = (tuple(range(le.NUM)), (2, 1, 0))
        lbs = LoopBlockingScheme(p_nld, bl_ts, bl_ords, self.resource['PAR'],
                                 bufshr, self.options['BUFSHR'])
        self.assertTrue(lbs.is_valid())
        self.assertGreater(sum(lbs.get_noc_access()), 0)
        self.assertGreater(
            lbs.bufshr_grp_size[de.IFM], lbs.bufshr_subgrp_size[de.IFM],
            'test_bufshr_multisubgrp_example: '
            'made-up LoopBlockingScheme is not expected: '
            '{}, bufshr grp size {}, bufshr subgrp size {}'.format(
                (bl_ts, bl_ords), lbs.bufshr_grp_size, lbs.bufshr_subgrp_size))
        self.assertGreater(
            lbs.bufshr_rot_round_cnt[de.IFM], 0,
            'test_bufshr_multisubgrp_example: '
            'made-up LoopBlockingScheme is not expected: '
            '{}, bufshr rotation rounds for {} {}'.format(
                (bl_ts, bl_ords), de.IFM, lbs.bufshr_rot_round_cnt[de.IFM]))

        # Sim.
        dram_access, gbuf_access, bufshr_stats = \
                self._sim_access_conv(lbs, get_bufshr=True)

        self._verify_bufshr_stats(dram_access, gbuf_access, bufshr_stats, lbs,
                                  bufshr, 'test_bufshr_multisubgrp_example')
 def _lbs(self,
          bl_ts,
          bl_ords=None,
          wlkey='BASE',
          rsrckey='BASE',
          optkey='BASE'):
     ''' Make a LoopBlockingScheme instance. '''
     bl_ords = (tuple(range(le.NUM)), tuple(range(le.NUM))) \
             if not bl_ords else bl_ords
     return LoopBlockingScheme(self.nld[wlkey], bl_ts, bl_ords,
                               self.resource[rsrckey], self.bufshr,
                               self.options[optkey])
    def test_bufshr_rotation_example(self):
        ''' Example scheme using bufshr with rotation. '''

        # Make a PartitionScheme that allows bufshr for all data categories.
        part = PartitionScheme(order=range(pe.NUM),
                               pdims=((2, 1), (1, 2), (1, 1), (2, 1)))
        bufshr = BufShrScheme(self.par_proc_region, part)
        self.assertTrue(
            all(bufshr.size(dce) > 1 for dce in range(de.NUM)),
            'test_bufshr_rotation_example: '
            'made-up PartitionScheme is not expected: '
            '{}, bufshr size {}'.format(
                part, [bufshr.size(dce) for dce in range(de.NUM)]))

        # Make a LoopBlockingScheme that uses bufshr for all data categories.
        p_nld = self._part_nld(part)
        bl_ts = ((util.idivc(p_nld.loopcnt[le.IFM],
                             6), util.idivc(p_nld.loopcnt[le.OFM], 9),
                  util.idivc(p_nld.loopcnt[le.BAT], 2)), (3, 3, 2), (2, 3, 1))
        bl_ords = (tuple(range(le.NUM)), tuple(range(le.NUM)))
        lbs = LoopBlockingScheme(p_nld, bl_ts, bl_ords, self.resource['PAR'],
                                 bufshr, self.options['BUFSHR'])
        self.assertTrue(lbs.is_valid())
        self.assertGreater(sum(lbs.get_noc_access()), 0)
        self.assertTrue(
            all(sgs > 1 for sgs in lbs.bufshr_subgrp_size)
            and all(t > 1 for t in bl_ts[0]), 'test_bufshr_rotation_example: '
            'made-up LoopBlockingScheme is not expected: '
            '{}, top factors {}, bufshr subgrp size {}'.format(
                (bl_ts, bl_ords), bl_ts[0], lbs.bufshr_subgrp_size))

        # Sim.
        dram_access, gbuf_access, bufshr_stats = \
                self._sim_access_conv(lbs, get_bufshr=True)

        self._verify_bufshr_stats(dram_access, gbuf_access, bufshr_stats, lbs,
                                  bufshr, 'test_bufshr_rotation_example')
    def test_ordered_loops(self):
        ''' Get ordered_loops. '''
        assert list(range(le.NUM)) == [le.IFM, le.OFM, le.BAT]

        self.assertListEqual(
            LoopBlockingScheme.ordered_loops((3, 5, 2), (2, 0, 1)),
            [(le.IFM, 3), (le.BAT, 2), (le.OFM, 5)])

        # Trivial loops at different positions.
        self.assertListEqual(
            LoopBlockingScheme.ordered_loops((3, 5, 1), (0, 1, 2)),
            [(le.OFM, 5), (le.IFM, 3)])
        self.assertListEqual(
            LoopBlockingScheme.ordered_loops((3, 5, 1), (1, 2, 0)),
            [(le.OFM, 5), (le.IFM, 3)])
        self.assertListEqual(
            LoopBlockingScheme.ordered_loops((3, 5, 1), (0, 2, 1)),
            [(le.OFM, 5), (le.IFM, 3)])

        # Different loops are trivial.
        self.assertListEqual(
            LoopBlockingScheme.ordered_loops((1, 5, 2), (0, 2, 1)),
            [(le.OFM, 5), (le.BAT, 2)])
        self.assertListEqual(
            LoopBlockingScheme.ordered_loops((3, 1, 2), (0, 2, 1)),
            [(le.BAT, 2), (le.IFM, 3)])

        # Multiple trivial loops.
        self.assertListEqual(
            LoopBlockingScheme.ordered_loops((1, 5, 1), (0, 1, 2)),
            [(le.OFM, 5)])
        self.assertListEqual(
            LoopBlockingScheme.ordered_loops((1, 1, 1), (0, 1, 2)),
            [])

        for bl_t, bl_ord in itertools.product(
                itertools.product(*[range(1, 8)] * 3),
                itertools.permutations(range(le.NUM))):

            ord_loops = LoopBlockingScheme.ordered_loops(bl_t, bl_ord)
            self.assertTrue(all(len(tpl) == 2 for tpl in ord_loops))
            self.assertFalse(any(tpl[1] <= 1 for tpl in ord_loops))
            self.assertEqual(len(ord_loops), le.NUM - bl_t.count(1))
            self.assertTrue(all(tpl[1] == bl_t[tpl[0]] for tpl in ord_loops))

            rev_loops = LoopBlockingScheme.ordered_loops(bl_t, bl_ord,
                                                         reverse=True)
            ord_lpes = LoopBlockingScheme.ordered_loops(bl_t, bl_ord,
                                                        lpe_only=True)
            self.assertEqual(len(rev_loops), len(ord_loops))
            self.assertEqual(len(ord_lpes), len(ord_loops))
            self.assertListEqual(list(reversed(rev_loops)), ord_loops)
            self.assertListEqual([tpl[0] for tpl in ord_loops], ord_lpes)