def test_map_alex_net(self):
        ''' Map AlexNet, JSSC'17, Table III and V. '''

        # Replication is denoted in Table III as r and t. Physical PE set width
        # is denoted in Table III as e.
        # In Table III for CONV1, t = 2, but e = 7. Here we simplify to t = 1
        # and e = 14.
        repl_size_dict = {
            'conv1': 1 * 1,
            'conv2': 1 * 1,
            'conv3': 1 * 4,
            'conv4': 2 * 2,
            'conv5': 2 * 2
        }
        ppeset_width_dict = {
            'conv1': 14,
            'conv2': 27,
            'conv3': 13,
            'conv4': 13,
            'conv5': 13
        }

        # Active PEs given in Table V.
        active_pes_dict = {
            'conv1': 154,
            'conv2': 135,
            'conv3': 156,
            'conv4': 156,
            'conv5': 156
        }

        batch_size = 4
        occ = 1

        for name, layer in self.convlayers.items():

            ms = MapStrategyEyeriss(layer, batch_size, occ, self.dim_array)

            # Two ways to calculate active PEs.
            # Physical PE set size. Max active PEs.
            active_pes_max = ms.dim_ppeset.size()
            # Utilization. Average active PEs.
            active_pes_avg = ms.utilization() * self.dim_array.size()

            repl_size = ms.repl.size()

            # Note that the physical PE set width is given by flpeset, before
            # scheduling fold.w using repl.h.
            ppeset_width = ms.dim_flpeset.w

            self.assertTrue(active_pes_max == active_pes_dict[name]
                            or active_pes_avg == active_pes_dict[name])
            self.assertEqual(repl_size, repl_size_dict[name])
            self.assertEqual(ppeset_width, ppeset_width_dict[name])
 def _part_nld(self, part, layerkey='PAR'):
     ''' Make a partitioned NestedLoopDesc and its partition occupation. '''
     p_layer, p_batch_size, p_occ = part.part_layer(self.layer[layerkey],
                                                    self.batch_size)
     p_nld = next(
         MapStrategyEyeriss(
             p_layer, p_batch_size, p_occ,
             self.resource['PAR'].dim_array).gen_nested_loop_desc())
     return p_nld
    def test_nested_loop_desc_fold_w(self):
        ''' Generated nested loop description when folding width. '''

        layer = self.convlayers['conv1']
        batch_size = 4
        occ = 1

        ms = MapStrategyEyeriss(layer, batch_size, occ, self.dim_array)

        self.assertTupleEqual(ms.repl, (1, 1))
        self.assertEqual(ms.fold.h, 1)
        self.assertGreater(ms.fold.w, 1)

        # Only 1 possible nld.
        nld_list = list(ms.gen_nested_loop_desc())
        self.assertEqual(len(nld_list), 1)
        nld = nld_list[0]

        # Fold to batch size.
        fold_w = ms.fold.w
        folded_layer = ConvLayer(layer.nifm,
                                 layer.nofm,
                                 (util.idivc(layer.hofm, fold_w), layer.wofm),
                                 (layer.hfil, layer.wfil),
                                 strd=(layer.htrd, layer.wtrd))
        folded_batch_size = batch_size * fold_w

        locc = layer.total_ops(batch_size) \
                / folded_layer.total_ops(folded_batch_size)
        self.assertLessEqual(locc, 1)

        self.assertEqual(nld.loopcnt[le.IFM], folded_layer.nifm)
        self.assertEqual(nld.loopcnt[le.OFM], folded_layer.nofm)
        self.assertEqual(nld.loopcnt[le.BAT], folded_batch_size)

        self.assertEqual(nld.usize_gbuf[de.FIL], folded_layer.filter_size())
        self.assertEqual(nld.usize_gbuf[de.IFM], folded_layer.ifmap_size())
        self.assertEqual(nld.usize_gbuf[de.OFM], folded_layer.ofmap_size())

        # DRAM and GBUF accesses are equal.
        self.assertTupleEqual(nld.unit_access[me.DRAM],
                              nld.unit_access[me.GBUF])
    def test_nested_loop_desc_occupancy(self):
        ''' Nested loop description with occupancy. '''

        batch_size = 4
        occ0 = 1
        occ1 = 0.8

        for layer in self.convlayers.values() + self.fclayers.values() \
                + self.lrlayers.values() + self.fake_layers.values():

            ms0 = MapStrategyEyeriss(layer, batch_size, occ0, self.dim_array)
            ms1 = MapStrategyEyeriss(layer, batch_size, occ1, self.dim_array)

            for nld0, nld1 in zip(ms0.gen_nested_loop_desc(),
                                  ms1.gen_nested_loop_desc()):

                self.assertEqual(nld0.unit_time, nld1.unit_time)

                self.assertTupleEqual(nld0.usize_gbuf, nld1.usize_gbuf)
                self.assertTupleEqual(nld0.usize_regf, nld1.usize_regf)

                self.assertAlmostEqual(nld0.unit_ops * occ1,
                                       nld1.unit_ops * occ0)

                for mhe in range(me.NUM):
                    for dce in range(de.NUM):
                        if mhe == me.REGF:
                            self.assertAlmostEqual(
                                nld0.unit_access_at_of(mhe, dce) * occ1,
                                nld1.unit_access_at_of(mhe, dce) * occ0)
                        else:
                            self.assertAlmostEqual(
                                nld0.unit_access_at_of(mhe, dce),
                                nld1.unit_access_at_of(mhe, dce))
    def test_nested_loop_desc_fold_h(self):
        ''' Generated nested loop description when folding height. '''

        layer = self.fake_layers['LGFIL']
        batch_size = 4
        occ = 1

        ms = MapStrategyEyeriss(layer, batch_size, occ, self.dim_array)

        self.assertTupleEqual(ms.repl, (1, 1))
        self.assertGreater(ms.fold.h, 1)
        self.assertEqual(ms.fold.w, 1)

        # Only 1 possible nld.
        nld_list = list(ms.gen_nested_loop_desc())
        self.assertEqual(len(nld_list), 1)
        nld = nld_list[0]

        # Fold within processing pass.
        fold_h = ms.fold.h

        self.assertEqual(nld.loopcnt[le.IFM], layer.nifm)
        self.assertEqual(nld.loopcnt[le.OFM], layer.nofm)
        self.assertEqual(nld.loopcnt[le.BAT], batch_size)

        self.assertEqual(nld.usize_gbuf[de.FIL], layer.filter_size())
        self.assertEqual(nld.usize_gbuf[de.IFM], layer.ifmap_size())
        self.assertEqual(nld.usize_gbuf[de.OFM], layer.ofmap_size())

        # GBUF access is multiple of DRAM access.
        self.assertEqual(nld.unit_access_at_of(me.DRAM, de.FIL),
                         nld.unit_access_at_of(me.GBUF, de.FIL))
        self.assertEqual(
            nld.unit_access_at_of(me.DRAM, de.IFM) * fold_h,
            nld.unit_access_at_of(me.GBUF, de.IFM))
        self.assertEqual(
            nld.unit_access_at_of(me.DRAM, de.OFM) * fold_h,
            nld.unit_access_at_of(me.GBUF, de.OFM))
    def setUp(self):

        # Workload.
        self.layer = {}
        self.layer['BASE'] = ConvLayer(12, 10, 28, 3)
        self.layer['LGFIL'] = ConvLayer(2, 4, 28, 20)
        self.layer['POOL'] = PoolingLayer(32, 28, 2)
        self.layer['PAR'] = ConvLayer(24, 36, 56, 3)
        self.batch_size = 4

        # Resource.
        self.resource = {}
        dim_array = PhyDim2(16, 16)
        proc_region = NodeRegion(origin=PhyDim2(0, 0),
                                 dim=PhyDim2(1, 1),
                                 type=NodeRegion.PROC)
        data_region = NodeRegion(origin=PhyDim2(0, 0),
                                 dim=PhyDim2(1, 1),
                                 type=NodeRegion.DRAM)
        # Typical resource.
        self.resource['BASE'] = Resource(proc_region=proc_region,
                                         dram_region=data_region,
                                         src_data_region=data_region,
                                         dst_data_region=data_region,
                                         dim_array=dim_array,
                                         size_gbuf=65536,
                                         size_regf=64,
                                         array_bus_width=float('inf'),
                                         dram_bandwidth=float('inf'),
                                         no_time_mux=False)
        # Larger resource with sufficient capacity, to make all schemes valid.
        self.resource['LG'] = Resource(proc_region=proc_region,
                                       dram_region=data_region,
                                       src_data_region=data_region,
                                       dst_data_region=data_region,
                                       dim_array=dim_array,
                                       size_gbuf=1024**3,
                                       size_regf=1024**3,
                                       array_bus_width=float('inf'),
                                       dram_bandwidth=float('inf'),
                                       no_time_mux=False)
        # Small resource.
        self.resource['SM'] = Resource(proc_region=proc_region,
                                       dram_region=data_region,
                                       src_data_region=data_region,
                                       dst_data_region=data_region,
                                       dim_array=dim_array,
                                       size_gbuf=4096,
                                       size_regf=16,
                                       array_bus_width=float('inf'),
                                       dram_bandwidth=float('inf'),
                                       no_time_mux=False)
        # Multi-node parallel resource.
        self.resource['PAR'] = Resource(proc_region=NodeRegion(
            origin=PhyDim2(0, 0), dim=PhyDim2(4, 2), type=NodeRegion.PROC),
                                        dram_region=data_region,
                                        src_data_region=data_region,
                                        dst_data_region=data_region,
                                        dim_array=dim_array,
                                        size_gbuf=25000,
                                        size_regf=64,
                                        array_bus_width=float('inf'),
                                        dram_bandwidth=float('inf'),
                                        no_time_mux=False)
        # Resource with no data regions.
        proc_data_region = NodeRegion(origin=PhyDim2(1, 1),
                                      dim=PhyDim2(1, 1),
                                      type=NodeRegion.PROC)
        self.resource['SRCNOTDATA'] = Resource(
            proc_region=proc_region,
            dram_region=data_region,
            src_data_region=proc_data_region,
            dst_data_region=data_region,
            dim_array=dim_array,
            size_gbuf=1024**3,
            size_regf=1024**3,
            array_bus_width=float('inf'),
            dram_bandwidth=float('inf'),
            no_time_mux=False)
        self.resource['DSTNOTDATA'] = Resource(
            proc_region=proc_region,
            dram_region=data_region,
            src_data_region=data_region,
            dst_data_region=proc_data_region,
            dim_array=dim_array,
            size_gbuf=1024**3,
            size_regf=1024**3,
            array_bus_width=float('inf'),
            dram_bandwidth=float('inf'),
            no_time_mux=False)
        self.resource['DATALOCAL'] = Resource(proc_region=proc_region,
                                              dram_region=data_region,
                                              src_data_region=proc_region,
                                              dst_data_region=proc_region,
                                              dim_array=dim_array,
                                              size_gbuf=1024**3,
                                              size_regf=1024**3,
                                              array_bus_width=float('inf'),
                                              dram_bandwidth=float('inf'),
                                              no_time_mux=False)
        # Filter pinning.
        self.resource['FILPIN'] = Resource(proc_region=proc_region,
                                           dram_region=data_region,
                                           src_data_region=data_region,
                                           dst_data_region=data_region,
                                           dim_array=dim_array,
                                           size_gbuf=1024**3,
                                           size_regf=1024**3,
                                           array_bus_width=float('inf'),
                                           dram_bandwidth=float('inf'),
                                           no_time_mux=True)

        # Nested loop description after mapping.
        self.nld = {}
        self.nld['BASE'] = next(
            MapStrategyEyeriss(self.layer['BASE'], self.batch_size, 1,
                               dim_array).gen_nested_loop_desc())
        self.nld['LGFIL'] = next(
            MapStrategyEyeriss(self.layer['LGFIL'], self.batch_size, 1,
                               dim_array).gen_nested_loop_desc())
        self.nld['POOL'] = next(
            MapStrategyEyeriss(self.layer['POOL'], self.batch_size, 1,
                               dim_array).gen_nested_loop_desc())
        # Fake nested loop, with zero filter size.
        self.nld['ZERO_FIL'] = NestedLoopDesc(
            loopcnt=(12, 10, 4),
            usize_gbuf=(0, 1000, 800),
            usize_regf=(0, 3, 1),
            unit_access=((0, 1000, 800), (0, 1000, 800), (3, 9, 7), (1, 1, 1)),
            data_loops=(DataDimLoops(le.IFM,
                                     le.OFM), DataDimLoops(le.IFM, le.BAT),
                        DataDimLoops(le.OFM, le.BAT)),
            unit_ops=1,
            unit_time=1)
        # Fake nested loop, with zero ifmap size.
        self.nld['ZERO_IFM'] = NestedLoopDesc(
            loopcnt=(12, 10, 4),
            usize_gbuf=(9, 0, 800),
            usize_regf=(3, 0, 1),
            unit_access=((9, 0, 800), (9, 0, 800), (3, 9, 7), (1, 1, 1)),
            data_loops=(DataDimLoops(le.IFM,
                                     le.OFM), DataDimLoops(le.IFM, le.BAT),
                        DataDimLoops(le.OFM, le.BAT)),
            unit_ops=1,
            unit_time=1)

        # Fake partition scheme.
        self.part = PartitionScheme(range(pe.NUM), ((1, 1), ) * pe.NUM)

        # Fake buffer sharing scheme.
        self.bufshr = BufShrScheme(proc_region, self.part)

        # Options.
        self.options = {}
        # Basic.
        self.options['BASE'] = Option(ntops=2**30)
        # Multiprocessing.
        self.options['MP'] = Option(ntops=2**30, nprocesses=8)
        # Limited top schemes.
        self.options['NTOPS'] = Option(ntops=10)
        # Bypass.
        self.options['BYP'] = Option(sw_gbuf_bypass=(True, ) * 3, ntops=2**30)
        # Bypass solver.
        self.options['BYPSOL'] = Option(sw_gbuf_bypass=(True, ) * 3,
                                        sw_solve_loopblocking=True,
                                        ntops=2**30)
        # Access forwarding.
        self.options['ACCFWD'] = Option(hw_access_forwarding=True, ntops=2**30)
        # Buffer sharing.
        self.options['BUFSHR'] = Option(hw_gbuf_sharing=True, ntops=2**30)
        # Buffer sharing with bypassing.
        self.options['BUFSHR-BYP'] = Option(sw_gbuf_bypass=(True, ) * 3,
                                            hw_gbuf_sharing=True,
                                            ntops=2**30)

        # Constraint.
        self.none_cstr = SchedulingConstraint()
        self.cstr = SchedulingConstraint(topifm=1, topbat=1)

        # Cost.
        self.cost = Cost(mac_op=1,
                         mem_hier=(200, 6, 2, 1),
                         noc_hop=50,
                         idl_unit=50)
Example #7
0
    def test_nested_loop_desc_sanity(self):
        ''' Generated nested loop description sanity check. '''

        batch_size = 4

        for layer in self.convlayers.values() + self.fclayers.values() \
                + self.lrlayers.values() + self.fake_layers.values():

            ms = MapStrategyEyeriss(layer, batch_size, self.dim_array)

            for nld in ms.gen_nested_loop_desc():

                # Replication reduces numbers of IFM/OFM.
                self.assertGreaterEqual(layer.nifm, nld.loopcnt[le.IFM])
                self.assertGreaterEqual(layer.nofm, nld.loopcnt[le.OFM])
                # Folding increases batch size.
                self.assertEqual(nld.loopcnt[le.BAT] % batch_size, 0)

                # Total and unit ops.
                self.assertAlmostEqual(nld.total_ops(),
                                       layer.total_ops(batch_size))
                self.assertAlmostEqual(nld.unit_ops * util.prod(nld.loopcnt),
                                       layer.total_ops(batch_size))

                # Unit time and unit ops.
                # The difference is due to the loop occupation, which is not
                # counted in utilization.
                self.assertGreaterEqual(
                    nld.unit_time * ms.utilization() * self.dim_array.size(),
                    nld.unit_ops)

                # Total access at DRAM.
                self.assertAlmostEqual(
                    nld.total_access_at_of(me.DRAM, de.FIL),
                    layer.total_filter_size()
                    if isinstance(layer, ConvLayer) else 0)
                # IFM may have refetch due to folding.
                self.assertGreaterEqual(
                    nld.total_access_at_of(me.DRAM, de.IFM) + 1e-7,
                    layer.total_ifmap_size(batch_size))
                self.assertAlmostEqual(nld.total_access_at_of(me.DRAM, de.OFM),
                                       layer.total_ofmap_size(batch_size))

                # Unit access to REGF.
                self.assertAlmostEqual(
                    nld.unit_access[me.REGF][de.FIL] * util.prod(nld.loopcnt),
                    layer.total_ops(batch_size) if isinstance(
                        layer, ConvLayer) else 0)
                self.assertAlmostEqual(
                    nld.unit_access[me.REGF][de.IFM] * util.prod(nld.loopcnt),
                    layer.total_ops(batch_size))
                self.assertAlmostEqual(
                    nld.unit_access[me.REGF][de.OFM] * util.prod(nld.loopcnt),
                    layer.total_ops(batch_size))

                # Unit GBUF size and unit access to DRAM.
                self.assertTrue(
                    all(us >= ua for us, ua in zip(nld.usize_gbuf,
                                                   nld.unit_access[me.DRAM])))

                # Unit REGF size.
                if isinstance(layer, ConvLayer):
                    # See JSSC'17, IV. A. Dimensions Beyond 2-D in PE Array. 1).
                    self.assertEqual(nld.usize_regf[de.FIL], layer.wfil)
                    self.assertEqual(nld.usize_regf[de.IFM], layer.wfil)
                    self.assertEqual(nld.usize_regf[de.OFM], 1)

                # Data dimension loops.
                if isinstance(layer, ConvLayer):
                    self.assertEqual(nld.data_loops[de.FIL],
                                     DataDimLoops(le.IFM, le.OFM))
                    self.assertEqual(nld.data_loops[de.IFM],
                                     DataDimLoops(le.IFM, le.BAT))
                    self.assertEqual(nld.data_loops[de.OFM],
                                     DataDimLoops(le.OFM, le.BAT))
                elif isinstance(layer, ConvLayer):
                    self.assertEqual(nld.data_loops[de.FIL], DataDimLoops())
                    self.assertEqual(nld.data_loops[de.IFM],
                                     DataDimLoops(le.OFM, le.BAT))
                    self.assertEqual(nld.data_loops[de.OFM],
                                     DataDimLoops(le.OFM, le.BAT))
Example #8
0
 def test_invalid_layer(self):
     ''' Constructor with invalid layer type. '''
     with self.assertRaisesRegexp(TypeError, 'MapEyeriss: .*type.*'):
         _ = MapStrategyEyeriss(Layer(1, 1), 4, self.dim_array)
    def setUp(self):

        # Workload.
        self.layer = {}
        self.layer['BASE'] = ConvLayer(12, 10, 28, 3)
        self.layer['LGFIL'] = ConvLayer(2, 4, 28, 20)
        self.layer['POOL'] = PoolingLayer(32, 28, 2)
        self.batch_size = 4

        # Resource.
        self.resource = {}
        dim_array = PhyDim2(16, 16)
        proc_region = NodeRegion(origin=PhyDim2(0, 0),
                                 dim=PhyDim2(1, 1),
                                 type=NodeRegion.PROC)
        data_regions = (NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(1, 1),
                                   type=NodeRegion.DATA), )
        # Typical resource.
        self.resource['BASE'] = Resource(proc_region=proc_region,
                                         data_regions=data_regions,
                                         dim_array=dim_array,
                                         size_gbuf=65536,
                                         size_regf=64)
        # Larger resource with sufficient capacity, to make all schemes valid.
        self.resource['LG'] = Resource(proc_region=proc_region,
                                       data_regions=data_regions,
                                       dim_array=dim_array,
                                       size_gbuf=1024**3,
                                       size_regf=1024**3)
        # Small resource.
        self.resource['SM'] = Resource(proc_region=proc_region,
                                       data_regions=data_regions,
                                       dim_array=dim_array,
                                       size_gbuf=4096,
                                       size_regf=16)

        # Nested loop description after mapping.
        self.nld = {}
        self.nld['BASE'] = next(
            MapStrategyEyeriss(self.layer['BASE'], self.batch_size,
                               dim_array).gen_nested_loop_desc())
        self.nld['LGFIL'] = next(
            MapStrategyEyeriss(self.layer['LGFIL'], self.batch_size,
                               dim_array).gen_nested_loop_desc())
        self.nld['POOL'] = next(
            MapStrategyEyeriss(self.layer['POOL'], self.batch_size,
                               dim_array).gen_nested_loop_desc())
        # Fake nested loop, with zero filter size.
        self.nld['ZERO_FIL'] = NestedLoopDesc(
            loopcnt=(12, 10, 4),
            usize_gbuf=(0, 1000, 800),
            usize_regf=(0, 3, 1),
            unit_access=((0, 1000, 800), (0, 1000, 800), (3, 9, 7), (1, 1, 1)),
            data_loops=(DataDimLoops(le.IFM,
                                     le.OFM), DataDimLoops(le.IFM, le.BAT),
                        DataDimLoops(le.OFM, le.BAT)),
            unit_ops=1,
            unit_time=1)
        # Fake nested loop, with zero ifmap size.
        self.nld['ZERO_IFM'] = NestedLoopDesc(
            loopcnt=(12, 10, 4),
            usize_gbuf=(9, 0, 800),
            usize_regf=(3, 0, 1),
            unit_access=((9, 0, 800), (9, 0, 800), (3, 9, 7), (1, 1, 1)),
            data_loops=(DataDimLoops(le.IFM,
                                     le.OFM), DataDimLoops(le.IFM, le.BAT),
                        DataDimLoops(le.OFM, le.BAT)),
            unit_ops=1,
            unit_time=1)

        # Options.
        self.options = {}
        # Basic.
        self.options['BASE'] = Option(ntops=2**30)
        # Multiprocessing.
        self.options['MP'] = Option(ntops=2**30, nprocesses=8)
        # Limited top schemes.
        self.options['NTOPS'] = Option(ntops=10)
        # Bypass.
        self.options['BYP'] = Option(sw_gbuf_bypass=(True, ) * 3, ntops=2**30)
        # Bypass solver.
        self.options['BYPSOL'] = Option(sw_gbuf_bypass=(True, ) * 3,
                                        sw_solve_loopblocking=True,
                                        ntops=2**30)

        # Cost.
        self.cost = Cost(mac_op=1,
                         mem_hier=(200, 6, 2, 1),
                         noc_hop=50,
                         unit_static=50)

        # Partition occupation.
        self.part_occ = 0.91