def test_map_alex_net(self): ''' Map AlexNet, JSSC'17, Table III and V. ''' # Replication is denoted in Table III as r and t. Physical PE set width # is denoted in Table III as e. # In Table III for CONV1, t = 2, but e = 7. Here we simplify to t = 1 # and e = 14. repl_size_dict = { 'conv1': 1 * 1, 'conv2': 1 * 1, 'conv3': 1 * 4, 'conv4': 2 * 2, 'conv5': 2 * 2 } ppeset_width_dict = { 'conv1': 14, 'conv2': 27, 'conv3': 13, 'conv4': 13, 'conv5': 13 } # Active PEs given in Table V. active_pes_dict = { 'conv1': 154, 'conv2': 135, 'conv3': 156, 'conv4': 156, 'conv5': 156 } batch_size = 4 occ = 1 for name, layer in self.convlayers.items(): ms = MapStrategyEyeriss(layer, batch_size, occ, self.dim_array) # Two ways to calculate active PEs. # Physical PE set size. Max active PEs. active_pes_max = ms.dim_ppeset.size() # Utilization. Average active PEs. active_pes_avg = ms.utilization() * self.dim_array.size() repl_size = ms.repl.size() # Note that the physical PE set width is given by flpeset, before # scheduling fold.w using repl.h. ppeset_width = ms.dim_flpeset.w self.assertTrue(active_pes_max == active_pes_dict[name] or active_pes_avg == active_pes_dict[name]) self.assertEqual(repl_size, repl_size_dict[name]) self.assertEqual(ppeset_width, ppeset_width_dict[name])
def _part_nld(self, part, layerkey='PAR'): ''' Make a partitioned NestedLoopDesc and its partition occupation. ''' p_layer, p_batch_size, p_occ = part.part_layer(self.layer[layerkey], self.batch_size) p_nld = next( MapStrategyEyeriss( p_layer, p_batch_size, p_occ, self.resource['PAR'].dim_array).gen_nested_loop_desc()) return p_nld
def test_nested_loop_desc_fold_w(self): ''' Generated nested loop description when folding width. ''' layer = self.convlayers['conv1'] batch_size = 4 occ = 1 ms = MapStrategyEyeriss(layer, batch_size, occ, self.dim_array) self.assertTupleEqual(ms.repl, (1, 1)) self.assertEqual(ms.fold.h, 1) self.assertGreater(ms.fold.w, 1) # Only 1 possible nld. nld_list = list(ms.gen_nested_loop_desc()) self.assertEqual(len(nld_list), 1) nld = nld_list[0] # Fold to batch size. fold_w = ms.fold.w folded_layer = ConvLayer(layer.nifm, layer.nofm, (util.idivc(layer.hofm, fold_w), layer.wofm), (layer.hfil, layer.wfil), strd=(layer.htrd, layer.wtrd)) folded_batch_size = batch_size * fold_w locc = layer.total_ops(batch_size) \ / folded_layer.total_ops(folded_batch_size) self.assertLessEqual(locc, 1) self.assertEqual(nld.loopcnt[le.IFM], folded_layer.nifm) self.assertEqual(nld.loopcnt[le.OFM], folded_layer.nofm) self.assertEqual(nld.loopcnt[le.BAT], folded_batch_size) self.assertEqual(nld.usize_gbuf[de.FIL], folded_layer.filter_size()) self.assertEqual(nld.usize_gbuf[de.IFM], folded_layer.ifmap_size()) self.assertEqual(nld.usize_gbuf[de.OFM], folded_layer.ofmap_size()) # DRAM and GBUF accesses are equal. self.assertTupleEqual(nld.unit_access[me.DRAM], nld.unit_access[me.GBUF])
def test_nested_loop_desc_occupancy(self): ''' Nested loop description with occupancy. ''' batch_size = 4 occ0 = 1 occ1 = 0.8 for layer in self.convlayers.values() + self.fclayers.values() \ + self.lrlayers.values() + self.fake_layers.values(): ms0 = MapStrategyEyeriss(layer, batch_size, occ0, self.dim_array) ms1 = MapStrategyEyeriss(layer, batch_size, occ1, self.dim_array) for nld0, nld1 in zip(ms0.gen_nested_loop_desc(), ms1.gen_nested_loop_desc()): self.assertEqual(nld0.unit_time, nld1.unit_time) self.assertTupleEqual(nld0.usize_gbuf, nld1.usize_gbuf) self.assertTupleEqual(nld0.usize_regf, nld1.usize_regf) self.assertAlmostEqual(nld0.unit_ops * occ1, nld1.unit_ops * occ0) for mhe in range(me.NUM): for dce in range(de.NUM): if mhe == me.REGF: self.assertAlmostEqual( nld0.unit_access_at_of(mhe, dce) * occ1, nld1.unit_access_at_of(mhe, dce) * occ0) else: self.assertAlmostEqual( nld0.unit_access_at_of(mhe, dce), nld1.unit_access_at_of(mhe, dce))
def test_nested_loop_desc_fold_h(self): ''' Generated nested loop description when folding height. ''' layer = self.fake_layers['LGFIL'] batch_size = 4 occ = 1 ms = MapStrategyEyeriss(layer, batch_size, occ, self.dim_array) self.assertTupleEqual(ms.repl, (1, 1)) self.assertGreater(ms.fold.h, 1) self.assertEqual(ms.fold.w, 1) # Only 1 possible nld. nld_list = list(ms.gen_nested_loop_desc()) self.assertEqual(len(nld_list), 1) nld = nld_list[0] # Fold within processing pass. fold_h = ms.fold.h self.assertEqual(nld.loopcnt[le.IFM], layer.nifm) self.assertEqual(nld.loopcnt[le.OFM], layer.nofm) self.assertEqual(nld.loopcnt[le.BAT], batch_size) self.assertEqual(nld.usize_gbuf[de.FIL], layer.filter_size()) self.assertEqual(nld.usize_gbuf[de.IFM], layer.ifmap_size()) self.assertEqual(nld.usize_gbuf[de.OFM], layer.ofmap_size()) # GBUF access is multiple of DRAM access. self.assertEqual(nld.unit_access_at_of(me.DRAM, de.FIL), nld.unit_access_at_of(me.GBUF, de.FIL)) self.assertEqual( nld.unit_access_at_of(me.DRAM, de.IFM) * fold_h, nld.unit_access_at_of(me.GBUF, de.IFM)) self.assertEqual( nld.unit_access_at_of(me.DRAM, de.OFM) * fold_h, nld.unit_access_at_of(me.GBUF, de.OFM))
def setUp(self): # Workload. self.layer = {} self.layer['BASE'] = ConvLayer(12, 10, 28, 3) self.layer['LGFIL'] = ConvLayer(2, 4, 28, 20) self.layer['POOL'] = PoolingLayer(32, 28, 2) self.layer['PAR'] = ConvLayer(24, 36, 56, 3) self.batch_size = 4 # Resource. self.resource = {} dim_array = PhyDim2(16, 16) proc_region = NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.PROC) data_region = NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.DRAM) # Typical resource. self.resource['BASE'] = Resource(proc_region=proc_region, dram_region=data_region, src_data_region=data_region, dst_data_region=data_region, dim_array=dim_array, size_gbuf=65536, size_regf=64, array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False) # Larger resource with sufficient capacity, to make all schemes valid. self.resource['LG'] = Resource(proc_region=proc_region, dram_region=data_region, src_data_region=data_region, dst_data_region=data_region, dim_array=dim_array, size_gbuf=1024**3, size_regf=1024**3, array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False) # Small resource. self.resource['SM'] = Resource(proc_region=proc_region, dram_region=data_region, src_data_region=data_region, dst_data_region=data_region, dim_array=dim_array, size_gbuf=4096, size_regf=16, array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False) # Multi-node parallel resource. self.resource['PAR'] = Resource(proc_region=NodeRegion( origin=PhyDim2(0, 0), dim=PhyDim2(4, 2), type=NodeRegion.PROC), dram_region=data_region, src_data_region=data_region, dst_data_region=data_region, dim_array=dim_array, size_gbuf=25000, size_regf=64, array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False) # Resource with no data regions. proc_data_region = NodeRegion(origin=PhyDim2(1, 1), dim=PhyDim2(1, 1), type=NodeRegion.PROC) self.resource['SRCNOTDATA'] = Resource( proc_region=proc_region, dram_region=data_region, src_data_region=proc_data_region, dst_data_region=data_region, dim_array=dim_array, size_gbuf=1024**3, size_regf=1024**3, array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False) self.resource['DSTNOTDATA'] = Resource( proc_region=proc_region, dram_region=data_region, src_data_region=data_region, dst_data_region=proc_data_region, dim_array=dim_array, size_gbuf=1024**3, size_regf=1024**3, array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False) self.resource['DATALOCAL'] = Resource(proc_region=proc_region, dram_region=data_region, src_data_region=proc_region, dst_data_region=proc_region, dim_array=dim_array, size_gbuf=1024**3, size_regf=1024**3, array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False) # Filter pinning. self.resource['FILPIN'] = Resource(proc_region=proc_region, dram_region=data_region, src_data_region=data_region, dst_data_region=data_region, dim_array=dim_array, size_gbuf=1024**3, size_regf=1024**3, array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=True) # Nested loop description after mapping. self.nld = {} self.nld['BASE'] = next( MapStrategyEyeriss(self.layer['BASE'], self.batch_size, 1, dim_array).gen_nested_loop_desc()) self.nld['LGFIL'] = next( MapStrategyEyeriss(self.layer['LGFIL'], self.batch_size, 1, dim_array).gen_nested_loop_desc()) self.nld['POOL'] = next( MapStrategyEyeriss(self.layer['POOL'], self.batch_size, 1, dim_array).gen_nested_loop_desc()) # Fake nested loop, with zero filter size. self.nld['ZERO_FIL'] = NestedLoopDesc( loopcnt=(12, 10, 4), usize_gbuf=(0, 1000, 800), usize_regf=(0, 3, 1), unit_access=((0, 1000, 800), (0, 1000, 800), (3, 9, 7), (1, 1, 1)), data_loops=(DataDimLoops(le.IFM, le.OFM), DataDimLoops(le.IFM, le.BAT), DataDimLoops(le.OFM, le.BAT)), unit_ops=1, unit_time=1) # Fake nested loop, with zero ifmap size. self.nld['ZERO_IFM'] = NestedLoopDesc( loopcnt=(12, 10, 4), usize_gbuf=(9, 0, 800), usize_regf=(3, 0, 1), unit_access=((9, 0, 800), (9, 0, 800), (3, 9, 7), (1, 1, 1)), data_loops=(DataDimLoops(le.IFM, le.OFM), DataDimLoops(le.IFM, le.BAT), DataDimLoops(le.OFM, le.BAT)), unit_ops=1, unit_time=1) # Fake partition scheme. self.part = PartitionScheme(range(pe.NUM), ((1, 1), ) * pe.NUM) # Fake buffer sharing scheme. self.bufshr = BufShrScheme(proc_region, self.part) # Options. self.options = {} # Basic. self.options['BASE'] = Option(ntops=2**30) # Multiprocessing. self.options['MP'] = Option(ntops=2**30, nprocesses=8) # Limited top schemes. self.options['NTOPS'] = Option(ntops=10) # Bypass. self.options['BYP'] = Option(sw_gbuf_bypass=(True, ) * 3, ntops=2**30) # Bypass solver. self.options['BYPSOL'] = Option(sw_gbuf_bypass=(True, ) * 3, sw_solve_loopblocking=True, ntops=2**30) # Access forwarding. self.options['ACCFWD'] = Option(hw_access_forwarding=True, ntops=2**30) # Buffer sharing. self.options['BUFSHR'] = Option(hw_gbuf_sharing=True, ntops=2**30) # Buffer sharing with bypassing. self.options['BUFSHR-BYP'] = Option(sw_gbuf_bypass=(True, ) * 3, hw_gbuf_sharing=True, ntops=2**30) # Constraint. self.none_cstr = SchedulingConstraint() self.cstr = SchedulingConstraint(topifm=1, topbat=1) # Cost. self.cost = Cost(mac_op=1, mem_hier=(200, 6, 2, 1), noc_hop=50, idl_unit=50)
def test_nested_loop_desc_sanity(self): ''' Generated nested loop description sanity check. ''' batch_size = 4 for layer in self.convlayers.values() + self.fclayers.values() \ + self.lrlayers.values() + self.fake_layers.values(): ms = MapStrategyEyeriss(layer, batch_size, self.dim_array) for nld in ms.gen_nested_loop_desc(): # Replication reduces numbers of IFM/OFM. self.assertGreaterEqual(layer.nifm, nld.loopcnt[le.IFM]) self.assertGreaterEqual(layer.nofm, nld.loopcnt[le.OFM]) # Folding increases batch size. self.assertEqual(nld.loopcnt[le.BAT] % batch_size, 0) # Total and unit ops. self.assertAlmostEqual(nld.total_ops(), layer.total_ops(batch_size)) self.assertAlmostEqual(nld.unit_ops * util.prod(nld.loopcnt), layer.total_ops(batch_size)) # Unit time and unit ops. # The difference is due to the loop occupation, which is not # counted in utilization. self.assertGreaterEqual( nld.unit_time * ms.utilization() * self.dim_array.size(), nld.unit_ops) # Total access at DRAM. self.assertAlmostEqual( nld.total_access_at_of(me.DRAM, de.FIL), layer.total_filter_size() if isinstance(layer, ConvLayer) else 0) # IFM may have refetch due to folding. self.assertGreaterEqual( nld.total_access_at_of(me.DRAM, de.IFM) + 1e-7, layer.total_ifmap_size(batch_size)) self.assertAlmostEqual(nld.total_access_at_of(me.DRAM, de.OFM), layer.total_ofmap_size(batch_size)) # Unit access to REGF. self.assertAlmostEqual( nld.unit_access[me.REGF][de.FIL] * util.prod(nld.loopcnt), layer.total_ops(batch_size) if isinstance( layer, ConvLayer) else 0) self.assertAlmostEqual( nld.unit_access[me.REGF][de.IFM] * util.prod(nld.loopcnt), layer.total_ops(batch_size)) self.assertAlmostEqual( nld.unit_access[me.REGF][de.OFM] * util.prod(nld.loopcnt), layer.total_ops(batch_size)) # Unit GBUF size and unit access to DRAM. self.assertTrue( all(us >= ua for us, ua in zip(nld.usize_gbuf, nld.unit_access[me.DRAM]))) # Unit REGF size. if isinstance(layer, ConvLayer): # See JSSC'17, IV. A. Dimensions Beyond 2-D in PE Array. 1). self.assertEqual(nld.usize_regf[de.FIL], layer.wfil) self.assertEqual(nld.usize_regf[de.IFM], layer.wfil) self.assertEqual(nld.usize_regf[de.OFM], 1) # Data dimension loops. if isinstance(layer, ConvLayer): self.assertEqual(nld.data_loops[de.FIL], DataDimLoops(le.IFM, le.OFM)) self.assertEqual(nld.data_loops[de.IFM], DataDimLoops(le.IFM, le.BAT)) self.assertEqual(nld.data_loops[de.OFM], DataDimLoops(le.OFM, le.BAT)) elif isinstance(layer, ConvLayer): self.assertEqual(nld.data_loops[de.FIL], DataDimLoops()) self.assertEqual(nld.data_loops[de.IFM], DataDimLoops(le.OFM, le.BAT)) self.assertEqual(nld.data_loops[de.OFM], DataDimLoops(le.OFM, le.BAT))
def test_invalid_layer(self): ''' Constructor with invalid layer type. ''' with self.assertRaisesRegexp(TypeError, 'MapEyeriss: .*type.*'): _ = MapStrategyEyeriss(Layer(1, 1), 4, self.dim_array)
def setUp(self): # Workload. self.layer = {} self.layer['BASE'] = ConvLayer(12, 10, 28, 3) self.layer['LGFIL'] = ConvLayer(2, 4, 28, 20) self.layer['POOL'] = PoolingLayer(32, 28, 2) self.batch_size = 4 # Resource. self.resource = {} dim_array = PhyDim2(16, 16) proc_region = NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.PROC) data_regions = (NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.DATA), ) # Typical resource. self.resource['BASE'] = Resource(proc_region=proc_region, data_regions=data_regions, dim_array=dim_array, size_gbuf=65536, size_regf=64) # Larger resource with sufficient capacity, to make all schemes valid. self.resource['LG'] = Resource(proc_region=proc_region, data_regions=data_regions, dim_array=dim_array, size_gbuf=1024**3, size_regf=1024**3) # Small resource. self.resource['SM'] = Resource(proc_region=proc_region, data_regions=data_regions, dim_array=dim_array, size_gbuf=4096, size_regf=16) # Nested loop description after mapping. self.nld = {} self.nld['BASE'] = next( MapStrategyEyeriss(self.layer['BASE'], self.batch_size, dim_array).gen_nested_loop_desc()) self.nld['LGFIL'] = next( MapStrategyEyeriss(self.layer['LGFIL'], self.batch_size, dim_array).gen_nested_loop_desc()) self.nld['POOL'] = next( MapStrategyEyeriss(self.layer['POOL'], self.batch_size, dim_array).gen_nested_loop_desc()) # Fake nested loop, with zero filter size. self.nld['ZERO_FIL'] = NestedLoopDesc( loopcnt=(12, 10, 4), usize_gbuf=(0, 1000, 800), usize_regf=(0, 3, 1), unit_access=((0, 1000, 800), (0, 1000, 800), (3, 9, 7), (1, 1, 1)), data_loops=(DataDimLoops(le.IFM, le.OFM), DataDimLoops(le.IFM, le.BAT), DataDimLoops(le.OFM, le.BAT)), unit_ops=1, unit_time=1) # Fake nested loop, with zero ifmap size. self.nld['ZERO_IFM'] = NestedLoopDesc( loopcnt=(12, 10, 4), usize_gbuf=(9, 0, 800), usize_regf=(3, 0, 1), unit_access=((9, 0, 800), (9, 0, 800), (3, 9, 7), (1, 1, 1)), data_loops=(DataDimLoops(le.IFM, le.OFM), DataDimLoops(le.IFM, le.BAT), DataDimLoops(le.OFM, le.BAT)), unit_ops=1, unit_time=1) # Options. self.options = {} # Basic. self.options['BASE'] = Option(ntops=2**30) # Multiprocessing. self.options['MP'] = Option(ntops=2**30, nprocesses=8) # Limited top schemes. self.options['NTOPS'] = Option(ntops=10) # Bypass. self.options['BYP'] = Option(sw_gbuf_bypass=(True, ) * 3, ntops=2**30) # Bypass solver. self.options['BYPSOL'] = Option(sw_gbuf_bypass=(True, ) * 3, sw_solve_loopblocking=True, ntops=2**30) # Cost. self.cost = Cost(mac_op=1, mem_hier=(200, 6, 2, 1), noc_hop=50, unit_static=50) # Partition occupation. self.part_occ = 0.91