def test_eyeriss_isscc16(self): ''' Reproduce Eyeriss ISSCC'16 paper Fig. 14.5.6, JSSC'17 paper Table V. ''' network = self.alex_net batch_size = 4 resource = Resource( proc_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.PROC), dram_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.DRAM), src_data_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.DRAM), dst_data_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.DRAM), dim_array=PhyDim2(12, 14), size_gbuf=108 * 1024 // 2, # 108 kB size_regf=261, # 225 + 12 + 24 array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False, ) cost = Cost( mac_op=2e-12, mem_hier=(460e-12, 15e-12, 4e-12, 1e-12), # pJ/16-b noc_hop=0, idl_unit=30e-3 / 200e6) # 30 mW GBUF + REGF # nnd = NNDataflow(network, batch_size, resource, cost, # self.map_strategy) # tops, _ = nnd.schedule_search(self.options) # self.assertTrue(tops) # dfsch = tops[0] # Use tool from nn_dataflow.tools import nn_dataflow_search as nnds cmd = '--batch 4 --nodes 1 1 --array 12 14 --regf 522 --gbuf 110592 --mem-type 3D --disable-bypass i o f --hop-cost 0 --op-cost 2e-12 --hier-cost 460e-12 15e-12 4e-12 1e-12 --unit-idle-cost 1.5e-10 alex_net' args = nnds.argparser().parse_args(cmd.split()) res = nnds.do_scheduling(args) dfsch = res['schedules'] ## Check results. # Results as stats of the rows in the table. header = 'Power, Processing Latency, Ops, Active PEs, Filter size' stats = {} for layer in ['conv{}'.format(i) for i in range(1, 6)]: onchip_cost = 0 time = 0 ops = 0 fil_size = 0 for layer_part in network: if not layer_part or not layer_part.startswith(layer): continue sr = dfsch[layer_part] onchip_cost += sr.total_cost \ - sr.total_accesses[me.DRAM] * cost.mem_hier[me.DRAM] time += sr.total_time ops += sr.total_ops fil_size += network[layer_part].total_filter_size() power = onchip_cost / (time / 200e6) * 1e3 # mW active_pes = int(ops / time) stats[layer] = [] stats[layer].append(power) stats[layer].append(time / 200.e3) # cycles to ms stats[layer].append(ops / 1e6) # to MOPs stats[layer].append(active_pes) stats[layer].append(fil_size / 1e3) # to k # Check. stats_ref = { 'conv1': [332, 16.5, 421.66, 151, 34.8], # Act PE 154 'conv2': [288, 39.2, 895.79, 135, 307.2], 'conv3': [266, 21.8, 598.1, 156, 884.7], 'conv4': [235, 16.0, 448.6, 156, 663.6], 'conv5': [236, 10.0, 299.0, 156, 442.4], } for layer in stats: success = (0.6 * stats_ref[layer][0] < stats[layer][0] < stats_ref[layer][0]) \ and (0.8 * stats_ref[layer][1] < stats[layer][1] < stats_ref[layer][1]) \ and all(abs(a - b) < 0.1 for a, b in zip(stats[layer][2:], stats_ref[layer][2:])) self.assertTrue( success, 'test_eyeriss_isscc16: ' 'stats diff in layer {}.\n' 'header: {}\n' 'actual: {}\nref: {}'.format(layer, header, stats[layer], stats_ref[layer]))
def test_invalid_origin(self): ''' Invalid origin. ''' with self.assertRaisesRegexp(TypeError, 'NodeRegion: .*origin.*'): _ = NodeRegion(dim=PhyDim2(4, 4), origin=(1, 3), type=NodeRegion.PROC)
def setUp(self): self.network = Network('test_net') self.network.set_input_layer(InputLayer(3, 224)) self.network.add('c1', ConvLayer(3, 64, 224, 3)) self.network.add('p1', PoolingLayer(64, 7, 32), prevs='c1') self.network.add('p2', PoolingLayer(64, 7, 32), prevs='c1') self.network.add('f1', FCLayer(128, 1000, 7), prevs=['p1', 'p2']) self.batch_size = 4 input_layer = self.network.input_layer() self.input_layout = DataLayout( frngs=(FmapRange((0, 0, 0, 0), FmapPosition(b=self.batch_size, n=input_layer.nofm, h=input_layer.hofm, w=input_layer.wofm)), ), regions=(NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(2, 1), type=NodeRegion.DRAM), ), parts=(PartitionScheme(order=range(pe.NUM), pdims=[(1, 1)] * pe.NUM), )) c1_layer = self.network['c1'] self.c1res = SchedulingResult( scheme=OrderedDict([ ('cost', 1.5), ('time', 200.), ('ops', 4.), ('num_nodes', 4), ('cost_op', 0.5), ('cost_access', 1.), ('cost_noc', 0), ('cost_static', 0), ('proc_time', 200), ('bus_time', 0), ('dram_time', 0), ('access', [[7, 8, 9]] * me.NUM), ('remote_gbuf_access', [0] * 3), ('total_nhops', [4, 5, 6]), ('fetch', [[1, 1, 1], [2, 2, 2]]), ('ti', [2, 2, 3]), ('to', [1, 2, 3]), ('tb', [1, 2, 3]), ('tvals', [[2, 1, 1], [2, 2, 2], [3, 3, 3]]), ('orders', [range(3)] * 2), ]), ofmap_layout=DataLayout( frngs=(FmapRange( (0, 0, 0, 0), FmapPosition(b=self.batch_size, n=c1_layer.nofm, h=c1_layer.hofm, w=c1_layer.wofm)), ), regions=(NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 2), type=NodeRegion.DRAM), ), parts=(PartitionScheme(order=range(pe.NUM), pdims=[(1, 1)] * pe.NUM), )), sched_seq=(0, 0, 0)) p1_layer = self.network['p1'] self.p1res = SchedulingResult( scheme=OrderedDict([ ('cost', 0.6), ('time', 5), ('ops', 0.1), ('num_nodes', 2), ('cost_op', 0.1), ('cost_access', 0.5), ('cost_noc', 0), ('cost_static', 0), ('proc_time', 5), ('bus_time', 0), ('dram_time', 0), ('access', [[.7, .8, .9]] * me.NUM), ('remote_gbuf_access', [0] * 3), ('total_nhops', [.4, .5, .6]), ('fetch', [[1, 1, 1], [2, 2, 2]]), ('ti', [2, 2, 3]), ('to', [1, 2, 3]), ('tb', [1, 2, 3]), ('tvals', [[2, 1, 1], [2, 2, 2], [3, 3, 3]]), ('orders', [range(3)] * 2), ]), ofmap_layout=DataLayout( frngs=(FmapRange( (0, 0, 0, 0), FmapPosition(b=self.batch_size, n=p1_layer.nofm, h=p1_layer.hofm, w=p1_layer.wofm)), ), regions=(NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 2), type=NodeRegion.DRAM), ), parts=(PartitionScheme(order=range(pe.NUM), pdims=[(1, 1)] * pe.NUM), )), sched_seq=(0, 1, 0)) self.p2res = SchedulingResult(scheme=self.p1res.scheme, ofmap_layout=self.p1res.ofmap_layout, sched_seq=(0, 2, 0)) self.dtfl = NNDataflowScheme(self.network, self.input_layout) self.dtfl['c1'] = self.c1res self.dtfl['p1'] = self.p1res self.dtfl['p2'] = self.p2res
def test_use_fwd(self): ''' Use access forwarding. ''' layer = self.layers['BASE'] part = PartitionScheme(order=(pe.BATP, pe.INPP, pe.OUTP, pe.OFMP), pdims=((2, 1), (2, 4), (1, 2), (2, 1))) nr = NodeRegion(origin=PhyDim2(0, 0), dim=part.dim(), type=NodeRegion.PROC) far_dist = 1000 ilayout = self._make_data_layout(layer.nifm, layer.hifm, layer.wifm, PhyDim2(-far_dist, 0), (1, 1), (1, 1), PhyDim2(1, 1)) olayout = self._make_data_layout(layer.nofm, layer.hofm, layer.wofm, PhyDim2(0, -far_dist), (1, 1), (1, 1), PhyDim2(1, 1)) filter_nodes = frozenset([PhyDim2(far_dist, 0), PhyDim2(0, far_dist)]) nhops_base = partition.unit_nhops_to_proc_region( layer, self.batch_size, nr, part, filter_nodes, ilayout, olayout, self.options['BASE']) nhops_accfwd = partition.unit_nhops_to_proc_region( layer, self.batch_size, nr, part, filter_nodes, ilayout, olayout, self.options['ACCFWD']) nhops_bufshr = partition.unit_nhops_to_proc_region( layer, self.batch_size, nr, part, filter_nodes, ilayout, olayout, self.options['BUFSHR']) for dce in range(de.NUM): self.assertEqual(nhops_accfwd[dce], nhops_bufshr[dce]) # In the basic access scheme, FIL and IFM are independently fetched, # resulting in repeating remote fetch. OFM are merged locally and only # stored back remotely once. self.assertGreater( nhops_base[de.FIL], layer.total_filter_size() * far_dist * part.size(pe.BATP) * part.size(pe.OFMP) * 0.8) self.assertGreater( nhops_base[de.IFM], layer.total_ifmap_size(self.batch_size) * far_dist * part.size(pe.OUTP) * 0.8) p_layer, p_batch_size, _ = part.part_layer(layer, self.batch_size) # With forwarding, everyone is only remotely fetched once. self.assertLess( nhops_accfwd[de.FIL], p_layer.total_filter_size() * part.size(pe.INPP, pe.OUTP) * (far_dist + nr.dim.size())) self.assertLess( nhops_accfwd[de.IFM], p_layer.total_ifmap_size(p_batch_size) * part.size(pe.INPP, pe.OFMP, pe.BATP) * (far_dist + nr.dim.size())) self.assertLess( nhops_accfwd[de.OFM], p_layer.total_ofmap_size(p_batch_size) * part.size(pe.OUTP, pe.OFMP, pe.BATP) * (far_dist + nr.dim.size()))
def test_dim(self): ''' Get dim. ''' self.assertEqual(self.ps1.dim(0), PhyDim2(2, 3)) self.assertEqual(self.ps1.dim(1), PhyDim2(3, 1)) self.assertEqual(self.ps1.dim(2), PhyDim2(1, 5)) self.assertEqual(self.ps1.dim(3), PhyDim2(5, 2)) self.assertEqual(self.ps2.dim(0), PhyDim2(2, 2)) self.assertEqual(self.ps2.dim(1), PhyDim2(5, 5)) self.assertEqual(self.ps2.dim(2), PhyDim2(3, 3)) self.assertEqual(self.ps2.dim(3), PhyDim2(1, 1)) self.assertEqual(self.ps1.dim(0, 1, 2), PhyDim2(2, 3) * PhyDim2(3, 1) * PhyDim2(1, 5)) self.assertEqual( self.ps1.dim(), PhyDim2(2, 3) * PhyDim2(3, 1) * PhyDim2(1, 5) * PhyDim2(5, 2)) self.assertEqual(self.ps1.dim(0, 1, 2), self.ps1.dim(1, 2, 0))
def test_hop_dist(self): ''' Get hop distance. ''' dim1 = PhyDim2(14, 12) dim2 = PhyDim2(5, 20) self.assertEqual(dim1.hop_dist(dim2), 9 + 8, 'hop_dist') self.assertEqual(dim2.hop_dist(dim1), 9 + 8, 'hop_dist')
def test_origin(self): ''' Origin. ''' layer = self.layers['BASE'] part = PartitionScheme(order=(pe.BATP, pe.INPP, pe.OUTP, pe.OFMP), pdims=((1, 1), (1, 1), (1, 1), (1, 1))) nr = NodeRegion(origin=PhyDim2(3, 3), dim=part.dim(), type=NodeRegion.PROC) ilayout = self._make_data_layout(layer.nifm, layer.hifm, layer.wifm, PhyDim2(-3, -3), (1, 1), (1, 1), PhyDim2(1, 1)) olayout = self._make_data_layout(layer.nofm, layer.hofm, layer.wofm, PhyDim2(3, 3), (1, 1), (1, 1), PhyDim2(1, 1)) filter_nodes = frozenset([PhyDim2(3, -3)]) nhops_1 = partition.unit_nhops_to_proc_region(layer, self.batch_size, nr, part, filter_nodes, ilayout, olayout, self.options['BASE']) nr = NodeRegion(origin=PhyDim2(6, 6), dim=part.dim(), type=NodeRegion.PROC) ilayout = self._make_data_layout(layer.nifm, layer.hifm, layer.wifm, PhyDim2(-6, -6), (1, 1), (1, 1), PhyDim2(1, 1)) olayout = self._make_data_layout(layer.nofm, layer.hofm, layer.wofm, PhyDim2(6, 6), (1, 1), (1, 1), PhyDim2(1, 1)) filter_nodes = frozenset([PhyDim2(6, -6)]) nhops_2 = partition.unit_nhops_to_proc_region(layer, self.batch_size, nr, part, filter_nodes, ilayout, olayout, self.options['BASE']) self.assertListEqual(nhops_2, [n * 2 for n in nhops_1])
def test_merge(self): ''' Merge. ''' fr = FmapRange((0, ) * 4, (30, ) * 4) frm = FmapRangeMap() frm.add(FmapRange((0, 0, 0, 0), (4, 1, 16, 16)), (PhyDim2(0, 0), PhyDim2(1, 1))) frm.add(FmapRange((0, 1, 0, 0), (4, 3, 16, 16)), (PhyDim2(1, 0), PhyDim2(2, 2))) dly = DataLayout(origin=PhyDim2(-1, -1), frmap=frm, type=NodeRegion.DATA) mdly1 = self.dly.merge('|', dly) mdly2 = dly.merge('|', self.dly) self.assertEqual(mdly1.type, self.dly.type, 'merge |: type') self.assertEqual(mdly2.type, dly.type, 'merge |: type') self.assertEqual(mdly1.frmap.complete_fmap_range(), mdly2.frmap.complete_fmap_range(), 'merge |: complete_fmap_range') self.assertEqual( mdly1.frmap.complete_fmap_range().size(), self.frm.complete_fmap_range().size() + frm.complete_fmap_range().size(), 'merge |: complete_fmap_range: size') self.assertEqual(mdly1.total_transfer_nhops(fr, PhyDim2(0, 0)), mdly2.total_transfer_nhops(fr, PhyDim2(0, 0)), 'merge |: nhops') self.assertEqual( mdly1.total_transfer_nhops(fr, PhyDim2(0, 0)), self.dly.total_transfer_nhops(fr, PhyDim2(0, 0)) + dly.total_transfer_nhops(fr, PhyDim2(0, 0)), 'merge |: nhops') frm.add(FmapRange((0, 3, 0, 0), (4, 4, 16, 16)), (PhyDim2(1, 3), PhyDim2(2, 1), PhyDim2(-1, -2))) # Only type differs from self.dly. sdly = DataLayout(origin=self.dly.origin, frmap=self.dly.frmap, type=NodeRegion.PROC) dly = DataLayout(origin=PhyDim2(-1, -1), frmap=frm, type=NodeRegion.PROC) mdly1 = sdly.merge('+', dly) mdly2 = dly.merge('+', sdly) self.assertEqual(mdly1.type, sdly.type, 'merge +: type') self.assertEqual(mdly2.type, dly.type, 'merge +: type') self.assertEqual(mdly1.frmap.complete_fmap_range(), mdly2.frmap.complete_fmap_range(), 'merge +: complete_fmap_range') self.assertEqual(mdly1.frmap.complete_fmap_range().size(), self.frm.complete_fmap_range().size(), 'merge +: complete_fmap_range: size') self.assertEqual(mdly1.total_transfer_nhops(fr, PhyDim2(0, 0)), mdly2.total_transfer_nhops(fr, PhyDim2(0, 0)), 'merge +: nhops') self.assertEqual( mdly1.total_transfer_nhops(fr, PhyDim2(0, 0)), sdly.total_transfer_nhops(fr, PhyDim2(0, 0)) + dly.total_transfer_nhops(fr, PhyDim2(0, 0)), 'merge +: nhops')
def test_merge_invalid_type(self): ''' Merge invalid. ''' with self.assertRaisesRegexp(TypeError, 'DataLayout: .*other.*'): _ = self.dly.merge('|', self.frm) with self.assertRaisesRegexp(TypeError, 'DataLayout: .*other.*'): _ = self.dly.merge('+', PhyDim2(1, 3))
def do_scheduling(args): ''' Get optimal scheduling for given problem. Return a result schedule. ''' ## Network. network = import_network(args.net) batch_size = args.batch ## Resource. dim_nodes = PhyDim2(*args.nodes) dim_array = PhyDim2(*args.array) # Sizes of gbuf and regf are in words. word = (args.word + 7) // 8 size_gbuf = args.gbuf // word size_regf = args.regf // word array_bus_width = args.bus_width // args.word if not array_bus_width: array_bus_width = float('inf') dram_bandwidth = args.dram_bw / word proc_region = NodeRegion(dim=dim_nodes, origin=PhyDim2(0, 0), type=NodeRegion.PROC) if args.mem_type == '2D': # Memory nodes are on two sides. data_region = NodeRegion(dim=PhyDim2(2, 2), origin=PhyDim2(0, 0), dist=dim_nodes - PhyDim2(1, 1), type=NodeRegion.DRAM) assert data_region.rel2abs(PhyDim2(1, 1)) + PhyDim2(1, 1) \ == proc_region.dim elif args.mem_type == '3D': # Memory nodes are on the top. data_region = NodeRegion(dim=dim_nodes, origin=PhyDim2(0, 0), type=NodeRegion.DRAM) resource = Resource(proc_region=proc_region, dram_region=data_region, src_data_region=data_region, dst_data_region=data_region, dim_array=dim_array, size_gbuf=size_gbuf, size_regf=size_regf, array_bus_width=array_bus_width, dram_bandwidth=dram_bandwidth, no_time_mux=False) ## Cost. hier_cost = [0] * me.NUM hier_cost[me.DRAM] = args.hier_cost[0] hier_cost[me.GBUF] = args.hier_cost[1] hier_cost[me.ITCN] = args.hier_cost[2] hier_cost[me.REGF] = args.hier_cost[3] cost = Cost(mac_op=args.op_cost, mem_hier=tuple(hier_cost), noc_hop=args.hop_cost, idl_unit=args.unit_idle_cost) ## Options. bypass = [True] * de.NUM bypass[de.IFM] = 'i' not in args.disable_bypass bypass[de.OFM] = 'o' not in args.disable_bypass bypass[de.FIL] = 'f' not in args.disable_bypass options = Option( sw_gbuf_bypass=tuple(bypass), sw_solve_loopblocking=args.solve_loopblocking, hw_access_forwarding=args.enable_access_forwarding, hw_gbuf_sharing=args.enable_gbuf_sharing, hw_gbuf_save_writeback=args.enable_save_writeback, partition_hybrid=args.hybrid_partition, partition_batch=args.batch_partition, partition_ifmaps=args.ifmaps_partition, partition_interlayer=args.interlayer_partition, layer_pipeline_time_ovhd=args.layer_pipeline_time_overhead, layer_pipeline_max_degree=args.layer_pipeline_max_degree, layer_pipeline_opt=not args.disable_interlayer_opt, opt_goal=args.goal.lower(), ntops=args.top, nprocesses=args.processes, verbose=args.verbose) ## Search schedules. nnd = NNDataflow(network, batch_size, resource, cost, MapStrategyEyeriss) tbeg = time.time() tops, cache_stats = nnd.schedule_search(options) tend = time.time() telapsed = tend - tbeg if not tops: sys.stderr.write('No valid dataflow found.\n') return None top = tops[0] ## Write results. res_map = OrderedDict() res_map['version'] = get_version(with_local=True) res_map['net'] = args.net res_map['batch'] = args.batch res_map['resource'] = resource._asdict() res_map['cost'] = cost._asdict() res_map['options'] = options._asdict() res_map['cache_stats'] = cache_stats res_map['elapsed'] = telapsed stats = stats_dict(top, cost) for key, val in stats.items(): res_map[key] = val return res_map
def test_view(self): ''' Get view. ''' frm = self.frm.copy() frm.add(FmapRange((0, 0, 0, 16), (4, 4, 16, 20)), (PhyDim2(2, 2), PhyDim2(3, 3))) frm.add(FmapRange((0, 0, 16, 0), (4, 4, 20, 20)), (PhyDim2(1, 1), PhyDim2(3, 3), PhyDim2(5, 5))) dly = DataLayout(origin=PhyDim2(1, 1), frmap=frm, type=NodeRegion.DATA) cfr = dly.frmap.complete_fmap_range() counters = dly.frmap.rget_counter(cfr) nhops = dly.total_transfer_nhops(cfr, PhyDim2(1, 2)) dly1 = dly.view(PhyDim2(-1, -1)) self.assertEqual(dly1.origin, PhyDim2(0, 0), 'view: origin') self.assertEqual(dly1.type, dly.type, 'view: type') self.assertEqual(dly1.frmap.complete_fmap_range(), cfr, 'view: complete_fmap_range') self.assertDictEqual(dly1.frmap.rget_counter(cfr), counters, 'view: counter') self.assertEqual( dly1.total_transfer_nhops(cfr, PhyDim2(1, 2) + PhyDim2(-1, -1)), nhops, 'view: nhops') dly2 = dly.view(PhyDim2(3, 1)) self.assertEqual(dly2.type, dly.type, 'view: type') self.assertEqual(dly2.frmap.complete_fmap_range(), cfr, 'view: complete_fmap_range') self.assertDictEqual(dly2.frmap.rget_counter(cfr), counters, 'view: counter') self.assertEqual( dly2.total_transfer_nhops(cfr, PhyDim2(1, 2) + PhyDim2(3, 1)), nhops, 'view: nhops')
def test_invalid_data_regions_type(self): ''' Invalid data_regions type. ''' with self.assertRaisesRegexp(TypeError, 'Resource: .*data_regions.*'): _ = Resource( proc_region=NodeRegion(dim=PhyDim2(2, 2), origin=PhyDim2(0, 0), type=NodeRegion.PROC), data_regions=[ NodeRegion(dim=PhyDim2(2, 1), origin=PhyDim2(0, 0), type=NodeRegion.DATA), ], dim_array=PhyDim2(16, 16), size_gbuf=131072, size_regf=(512, ), ) with self.assertRaisesRegexp(TypeError, 'Resource: .*data_regions.*'): _ = Resource( proc_region=NodeRegion(dim=PhyDim2(2, 2), origin=PhyDim2(0, 0), type=NodeRegion.PROC), data_regions=NodeRegion(dim=PhyDim2(2, 1), origin=PhyDim2(0, 0), type=NodeRegion.DATA), dim_array=PhyDim2(16, 16), size_gbuf=131072, size_regf=(512, ), ) with self.assertRaisesRegexp(TypeError, 'Resource: .*data_regions.*'): _ = Resource( proc_region=NodeRegion(dim=PhyDim2(2, 2), origin=PhyDim2(0, 0), type=NodeRegion.PROC), data_regions=(NodeRegion(dim=PhyDim2(2, 1), origin=PhyDim2(0, 0), type=NodeRegion.DATA), PhyDim2(2, 1)), dim_array=PhyDim2(16, 16), size_gbuf=131072, size_regf=(512, ), )
def test_eyeriss_asplos17(self): ''' Reproduce TETRIS ASPLOS'17 paper Figure 8. ''' network = self.alex_net batch_size = 16 ## L-1 configuration. resource = Resource( proc_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.PROC), data_regions=(NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.DATA), ), dim_array=PhyDim2(16, 16), size_gbuf=576056 // 2, # 576 kB size_regf=1024 // 2, # 1 kB ) cost = Cost( mac_op=2e-12, mem_hier=(240e-12, 28e-12, 4e-12, 1e-12), # pJ/16-b noc_hop=0, unit_static=320e-12) nnd = NNDataflow(network, batch_size, resource, cost, self.map_strategy) tops, _ = nnd.schedule_search(self.options) self.assertTrue(tops) dfsch_l1 = tops[0] ## T-16 configuration. resource = Resource( proc_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(4, 4), type=NodeRegion.PROC), data_regions=(NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(4, 4), type=NodeRegion.DATA), ), dim_array=PhyDim2(14, 14), size_gbuf=133032 // 2, # 133 kB size_regf=512 // 2, # 512 B ) cost = Cost( mac_op=2e-12, mem_hier=(80e-12, 14e-12, 4e-12, 0.6e-12), # pJ/16-b noc_hop=40e-12, unit_static=200e-12) options = Option(sw_gbuf_bypass=(True, True, True), sw_solve_loopblocking=True, partition_hybrid=True) nnd = NNDataflow(network, batch_size, resource, cost, self.map_strategy) tops, _ = nnd.schedule_search(options) self.assertTrue(tops) dfsch_t16 = tops[0] ## Check results. # Same workload. self.assertAlmostEqual(dfsch_t16.total_ops, dfsch_l1.total_ops) # Performance of T-16 is proportional to PE resource (20% margin). self.assertLess(dfsch_t16.total_time, 1.2 * dfsch_l1.total_time * (16 * 16) / (14 * 14 * 16)) # Energy reduced by > 30%. self.assertLess(dfsch_t16.total_cost, dfsch_l1.total_cost * 0.7)
def test_eyeriss_isscc16(self): ''' Reproduce Eyeriss ISSCC'16 paper Fig. 14.5.6, JSSC'17 paper Table V. ''' network = self.alex_net batch_size = 4 resource = Resource( proc_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.PROC), data_regions=(NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.DATA), ), dim_array=PhyDim2(12, 14), size_gbuf=108 * 1024 // 2, # 108 kB size_regf=261, # 225 + 12 + 24 ) cost = Cost( mac_op=2e-12, mem_hier=(460e-12, 15e-12, 4e-12, 1e-12), # pJ/16-b noc_hop=0, unit_static=30e-3 / 200e6) # 30 mW GBUF + REGF nnd = NNDataflow(network, batch_size, resource, cost, self.map_strategy) tops, _ = nnd.schedule_search(self.options) self.assertTrue(tops) dfsch = tops[0] ## Check results. # Results as stats of the rows in the table. header = 'Power, Processing Latency, Ops, Active PEs, Filter size' stats = {} for layer in ['conv{}'.format(i) for i in range(1, 6)]: onchip_cost = 0 time = 0 ops = 0 fil_size = 0 for layer_part in network: if not layer_part or not layer_part.startswith(layer): continue sr = dfsch[layer_part] onchip_cost += sr.total_cost \ - sr.total_accesses[me.DRAM] * cost.mem_hier[me.DRAM] time += sr.total_time ops += sr.total_ops fil_size += network[layer_part].total_filter_size() power = onchip_cost / (time / 200e6) * 1e3 # mW active_pes = int(ops / time) stats[layer] = [] stats[layer].append(power) stats[layer].append(time / 200.e3) # cycles to ms stats[layer].append(ops / 1e6) # to MOPs stats[layer].append(active_pes) stats[layer].append(fil_size / 1e3) # to k # Check. stats_ref = { 'conv1': [332, 16.5, 421.66, 151, 34.8], # Act PE 154 'conv2': [288, 39.2, 895.79, 135, 307.2], 'conv3': [266, 21.8, 598.1, 156, 884.7], 'conv4': [235, 16.0, 448.6, 156, 663.6], 'conv5': [236, 10.0, 299.0, 156, 442.4], } for layer in stats: success = (0.6 * stats_ref[layer][0] < stats[layer][0] < stats_ref[layer][0]) \ and (0.8 * stats_ref[layer][1] < stats[layer][1] < stats_ref[layer][1]) \ and all(abs(a - b) < 0.1 for a, b in zip(stats[layer][2:], stats_ref[layer][2:])) self.assertTrue( success, 'test_eyeriss_isscc16: ' 'stats diff in layer {}.\n' 'header: {}\n' 'actual: {}\nref: {}'.format(layer, header, stats[layer], stats_ref[layer]))
def test_sub(self): ''' Operation sub. ''' dim1 = PhyDim2(14, 12) dim2 = PhyDim2(5, 3) self.assertTupleEqual(dim1 - dim2, (9, 9), 'sub') self.assertTupleEqual(dim1 - 3, (11, 9), 'sub')
def test_invalid_frmap_type(self): ''' Invalid frmap type. ''' with self.assertRaisesRegexp(TypeError, 'DataLayout: .*frmap.*'): _ = DataLayout(origin=PhyDim2(2, 3), frmap=FmapRange((0, ) * 4, (1, ) * 4), type=NodeRegion.DATA)
def test_neg(self): ''' Operation neg. ''' dim1 = PhyDim2(14, 12) dim2 = PhyDim2(5, 3) self.assertTupleEqual(-dim1, (-14, -12), 'neg') self.assertTupleEqual(-dim2, (-5, -3), 'neg')
def test_invalid_type(self): ''' Invalid type. ''' with self.assertRaisesRegexp(ValueError, 'DataLayout: .*type.*'): _ = DataLayout(origin=PhyDim2(2, 3), frmap=self.frm, type=NodeRegion.NUM)
def test_hop_dist_error(self): ''' Get hop distance. ''' dim1 = PhyDim2(14, 12) with self.assertRaisesRegexp(TypeError, 'hop_dist'): _ = dim1.hop_dist((5, 20))
def test_is_in_region(self): ''' Whether is in region. ''' nr1 = NodeRegion(dim=PhyDim2(2, 2), origin=PhyDim2(1, 1), type=NodeRegion.DATA) self.assertTrue(self.dly.is_in_region(nr1)) nr2 = NodeRegion(dim=PhyDim2(3, 3), origin=PhyDim2(0, 0), type=NodeRegion.DATA) self.assertTrue(self.dly.is_in_region(nr2)) nr3 = NodeRegion(dim=PhyDim2(2, 2), origin=PhyDim2(0, 0), type=NodeRegion.DATA) self.assertFalse(self.dly.is_in_region(nr3)) nr4 = NodeRegion(dim=PhyDim2(3, 3), origin=PhyDim2(0, 0), type=NodeRegion.PROC) self.assertFalse(self.dly.is_in_region(nr4)) frm = self.frm.copy() frm.add(FmapRange((0, 0, 0, 16), (4, 4, 16, 20)), (PhyDim2(2, 2), PhyDim2(3, 3))) dly = DataLayout(origin=PhyDim2(1, 1), frmap=frm, type=NodeRegion.DATA) nr4 = NodeRegion(dim=PhyDim2(3, 3), origin=PhyDim2(1, 1), type=NodeRegion.DATA) self.assertFalse(dly.is_in_region(nr4)) nr5 = NodeRegion(dim=PhyDim2(4, 4), origin=PhyDim2(1, 1), type=NodeRegion.DATA) self.assertTrue(dly.is_in_region(nr5)) frm.add(FmapRange((0, 0, 16, 0), (4, 4, 20, 20)), (PhyDim2(1, 1), PhyDim2(3, 3), PhyDim2(5, 5))) dly = DataLayout(origin=PhyDim2(1, 1), frmap=frm, type=NodeRegion.DATA) self.assertFalse(dly.is_in_region(nr5)) nr6 = NodeRegion(dim=PhyDim2(7, 7), origin=PhyDim2(0, 0), type=NodeRegion.DATA) self.assertTrue(dly.is_in_region(nr6))
def test_small(self): ''' Small case with hand calculation. ''' layer = ConvLayer(6, 8, 16, 3) assert self.batch_size == 8 # i (0, 0), (2, 0): (0, 0, 0, 0) -- (4, 6, 10, 10) # (0, 1), (2, 1): (0, 0, 0, 8) -- (4, 6, 10, 18) # (0, 2), (2, 2): (4, 0, 0, 0) -- (8, 6, 10, 10) # (0, 3), (2, 3): (4, 0, 0, 8) -- (8, 6, 10, 18) # (1, 0), (3, 0): (0, 0, 8, 0) -- (4, 6, 18, 10) # (1, 1), (3, 1): (0, 0, 8, 8) -- (4, 6, 18, 18) # (1, 2), (3, 2): (4, 0, 8, 0) -- (8, 6, 18, 10) # (1, 3), (3, 3): (4, 0, 8, 8) -- (8, 6, 18, 18) # o (0, 0): (0, 0, 0, 0) -- (4, 4, 8, 8) # (0, 1): (0, 0, 0, 8) -- (4, 4, 8, 16) # (0, 2): (4, 0, 0, 0) -- (8, 4, 8, 8) # (0, 3): (4, 0, 0, 8) -- (8, 4, 8, 16) # (1, 0): (0, 0, 8, 0) -- (4, 4, 16, 8) # (1, 1): (0, 0, 8, 8) -- (4, 4, 16, 16) # (1, 2): (4, 0, 8, 0) -- (8, 4, 16, 8) # (1, 3): (4, 0, 8, 8) -- (8, 4, 16, 16) # (2, 0): (0, 4, 0, 0) -- (4, 8, 8, 8) # (2, 1): (0, 4, 0, 8) -- (4, 8, 8, 16) # (2, 2): (4, 4, 0, 0) -- (8, 8, 8, 8) # (2, 3): (4, 4, 0, 8) -- (8, 8, 8, 16) # (3, 0): (0, 4, 8, 0) -- (4, 8, 16, 8) # (3, 1): (0, 4, 8, 8) -- (4, 8, 16, 16) # (3, 2): (4, 4, 8, 0) -- (8, 8, 16, 8) # (3, 3): (4, 4, 8, 8) -- (8, 8, 16, 16) part = PartitionScheme(order=(pe.BATP, pe.INPP, pe.OUTP, pe.OFMP), pdims=((2, 1), (2, 2), (1, 2), (1, 1))) nr = NodeRegion(origin=PhyDim2(0, 0), dim=part.dim(), type=NodeRegion.PROC) # (0, 0, 0, 0) -- (4, 6, 18, 9): (-2, -2) # (0, 0, 0, 9) -- (4, 6, 18, 18): (-2, -1) # (4, 0, 0, 0) -- (8, 6, 18, 9): (-1, -2) # (4, 0, 0, 9) -- (8, 6, 18, 18): (-1, -1) ilayout = self._make_data_layout(layer.nifm, layer.hifm, layer.wifm, PhyDim2(-2, -2), (2, 1), (1, 1), PhyDim2(2, 2)) # (0, 0, 0, 0) -- (8, 4, 16, 8): (2, 2) # (0, 0, 0, 8) -- (8, 4, 16, 16): (2, 3) # (0, 4, 0, 0) -- (8, 8, 16, 8): (3, 2) # (0, 4, 0, 8) -- (8, 8, 16, 16): (3, 3) olayout = self._make_data_layout(layer.nofm, layer.hofm, layer.wofm, PhyDim2(2, 2), (1, 1), (2, 1), PhyDim2(2, 2)) filter_nodes = frozenset([PhyDim2(0, 0)]) # filter: (0, 0) -> all, 6 * 4 * 3 * 3 # ifmap: (-2, -2) -> (0, 0), (2, 0): 4 * 6 * 10 * 9 # -> (0, 1), (2, 1): 4 * 6 * 10 * (9 - 8) # -> (1, 0), (3, 0): 4 * 6 * (18 - 8) * 9 # -> (1, 1), (3, 1): 4 * 6 * (18 - 8) * (9 - 8) # (-2, -1) -> (0, 0), (2, 0): 4 * 6 * 10 * (10 - 9) # -> (0, 1), (2, 1): 4 * 6 * 10 * (18 - 9) # -> (1, 0), (3, 0): 4 * 6 * (18 - 8) * (10 - 9) # -> (1, 1), (3, 1): 4 * 6 * (18 - 8) * (18 - 9) # (-1, -2) -> (0, 2), (2, 2): (8 - 4) * 6 * 10 * 9 # -> (0, 3), (2, 3): (8 - 4) * 6 * 10 * (9 - 8) # -> (1, 2), (3, 2): (8 - 4) * 6 * (18 - 8) * 9 # -> (1, 3), (3, 3): (8 - 4) * 6 * (18 - 8) * (9 - 8) # (-1, -1) -> (0, 2), (2, 2): (8 - 4) * 6 * 10 * (10 - 9) # -> (0, 3), (2, 3): (8 - 4) * 6 * 10 * (18 - 9) # -> (1, 2), (3, 2): (8 - 4) * 6 * (18 - 8) * (10 - 9) # -> (1, 3), (3, 3): (8 - 4) * 6 * (18 - 8) * (18 - 9) # ofmap: (2, 2) -> (0, 0): # -> (0, 2): # -> (1, 0): # -> (1, 2): 4 * 4 * 8 * 8 # (2, 3) -> (0/1, 1/3) # (3, 2) -> (2/3, 0/2) # (3, 3) -> (2/3, 1/3) nhops = partition.unit_nhops_to_proc_region(layer, self.batch_size, nr, part, filter_nodes, ilayout, olayout, self.options['BASE']) self.assertEqual( nhops[de.FIL], 6 * 4 * 3 * 3 * sum(h + w for h in range(4) for w in range(4))) self.assertEqual( nhops[de.IFM], 4 * 6 * 10 * ((4 + 6) * 9 + (5 + 7) * 1 + (5 + 7) * 9 + (6 + 8) * 1 + (3 + 5) * 1 + (4 + 6) * 9 + (4 + 6) * 1 + (5 + 7) * 9 + (5 + 7) * 9 + (6 + 8) * 1 + (6 + 8) * 9 + (7 + 9) * 1 + (4 + 6) * 1 + (5 + 7) * 9 + (5 + 7) * 1 + (6 + 8) * 9)) self.assertEqual( nhops[de.OFM], 4 * 4 * 8 * 8 * ((4 + 2 + 3 + 1) + (4 + 2 + 3 + 1) + (3 + 1 + 2 + 0) + (3 + 1 + 2 + 0)))
def setUp(self): self.net = {} net = Network('net1') # Linear. net.set_input_layer(InputLayer(10, 1)) net.add('0', FCLayer(10, 20)) net.add('1', FCLayer(20, 30)) net.add('1p', PoolingLayer(30, 1, 1)) net.add('2', FCLayer(30, 40)) net.add('3', FCLayer(40, 50)) self.net[net.net_name] = net net = Network('net2') # Long linear. net.set_input_layer(InputLayer(1, 1)) for idx in range(16): net.add(str(idx), FCLayer(1, 1)) self.net[net.net_name] = net net = Network('net3') # Fork. # /0-2\ /6- 7- 8\ # x 4-5 12 # \1-3/ \9-10-11/ net.set_input_layer(InputLayer(1, 1)) net.add('0', FCLayer(1, 1), prevs=net.INPUT_LAYER_KEY) net.add('1', FCLayer(1, 1), prevs=net.INPUT_LAYER_KEY) net.add('2', FCLayer(2, 1), prevs=('0', '1')) net.add('2p', PoolingLayer(1, 1, 1)) net.add('3', FCLayer(2, 1), prevs=('0', '1')) net.add('4', FCLayer(2, 1), prevs=('2p', '3')) net.add('5', FCLayer(1, 1)) net.add('5p', PoolingLayer(1, 1, 1)) net.add('6', FCLayer(1, 1), prevs='5p') net.add('7', FCLayer(1, 1)) net.add('8', FCLayer(1, 1)) net.add('9', FCLayer(1, 1), prevs='5p') net.add('10', FCLayer(1, 1)) net.add('11', FCLayer(1, 1)) net.add('12', FCLayer(2, 1), prevs=('8', '11')) self.net[net.net_name] = net net = Network('net4') # Complex fork. # /5 \ # 0-1-2-3-4-6-7-8-10-14 # \9/ # \11-12 / # \13 / net.set_input_layer(InputLayer(1, 1)) net.add('0', FCLayer(1, 1)) net.add('1', FCLayer(1, 1)) net.add('2', FCLayer(1, 1)) net.add('3', FCLayer(1, 1)) net.add('4', FCLayer(1, 1)) net.add('5', FCLayer(1, 1), prevs='4') net.add('6', FCLayer(1, 1), prevs='4') net.add('7', FCLayer(1, 1)) net.add('8', FCLayer(1, 1), prevs='7') net.add('9', FCLayer(1, 1), prevs='7') net.add('10', FCLayer(1, 1)) net.add('10p', PoolingLayer(2, 1, 1), prevs=('8', '10')) net.add('11', PoolingLayer(1, 1, 1), prevs='4') net.add('12', FCLayer(1, 1)) net.add('13', PoolingLayer(1, 1, 1), prevs='4') net.add('14', FCLayer(5, 1), prevs=('5', '10p', '12', '13')) self.net[net.net_name] = net net = Network('net5') # Corner cases. # ----\ # //1-2\ 7-8\ # 0-3-4-x 10-11-12 # \ \5/ 9 / \__/ # 6--/ net.set_input_layer(InputLayer(1, 1)) net.add('0', FCLayer(1, 1)) net.add('1', FCLayer(1, 1), prevs='0') net.add('2', FCLayer(1, 1)) net.add('3', FCLayer(1, 1), prevs='0') net.add('4', FCLayer(1, 1), prevs='3') net.add('5', FCLayer(1, 1), prevs='3') net.add('6', FCLayer(1, 1), prevs='0') net.add('7', FCLayer(5, 1), prevs=('0', '2', '4', '5', '6')) net.add('8', FCLayer(1, 1)) net.add('9', FCLayer(5, 1), prevs=('0', '2', '4', '5', '6')) net.add('10', FCLayer(2, 1), prevs=('8', '9')) net.add('11', FCLayer(1, 1)) net.add('12', FCLayer(2, 1), prevs=('10', '11')) self.net[net.net_name] = net net = Network('net6') # Fmap sizes. net.set_input_layer(InputLayer(1, 24)) net.add('0', ConvLayer(1, 1, 24, 3)) net.add('1', ConvLayer(1, 1, 12, 3, strd=2)) net.add('1p', PoolingLayer(1, 6, 2)) net.add('2', ConvLayer(1, 1, 6, 3)) net.add('3', ConvLayer(1, 1, 6, 3, strd=4), prevs=('0')) self.net[net.net_name] = net net = Network('net7') # Topological order: see a visited vertex again. # /--- # 0-1-\\ # \2--2p net.set_input_layer(InputLayer(1, 1)) net.add('0', FCLayer(1, 1)) net.add('1', FCLayer(1, 1), prevs='0') net.add('2', FCLayer(1, 1), prevs='0') net.add('2p', PoolingLayer(3, 1, 1), prevs=('0', '1', '2')) self.net[net.net_name] = net net = Network('net8') # Forward to the middle. # /-\ # 0-1-2-2p-4-4p # \-3------/ net.set_input_layer(InputLayer(1, 1)) net.add('0', FCLayer(1, 1)) net.add('1', FCLayer(1, 1), prevs='0') net.add('2', FCLayer(1, 1), prevs='1') net.add('2p', PoolingLayer(2, 1, 1), prevs=('1', '2')) net.add('3', FCLayer(1, 1), prevs='0') net.add('4', FCLayer(2, 1), prevs='2p') net.add('4p', PoolingLayer(2, 1, 1), prevs=('3', '4')) self.net[net.net_name] = net net = Network('net9') # Previous layers include input and others. net.set_input_layer(InputLayer(1, 1)) net.add('0', FCLayer(1, 1)) net.add('1', FCLayer(2, 1), prevs=(net.INPUT_LAYER_KEY, '0')) self.net[net.net_name] = net # Real networks. for net_name in all_networks(): self.net[net_name] = import_network(net_name) self.batch_size = 16 self.resource = Resource( proc_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(8, 8), type=NodeRegion.PROC), dram_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(8, 8), type=NodeRegion.DRAM), src_data_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(8, 4), type=NodeRegion.DRAM), dst_data_region=NodeRegion(origin=PhyDim2(0, 4), dim=PhyDim2(8, 4), type=NodeRegion.DRAM), dim_array=PhyDim2(16, 16), size_gbuf=65536, size_regf=64, array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False) part = PartitionScheme(order=range(pe.NUM), pdims=[(1, 1)] * pe.NUM) self.ofmap_layout = DataLayout( frngs=(FmapRange((0, 0, 0, 0), (2, 4, 16, 16)), ), regions=(NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.DRAM), ), parts=(part, ))
def _make_region(dim): return NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(*dim), type=NodeRegion.DRAM)
def test_valid_args(self): ''' Valid arguments. ''' dim = PhyDim2(14, 12) self.assertEqual(dim.h, 14, 'h') self.assertEqual(dim.w, 12, 'w')
def eyerissAsplos17(self): ''' Reproduce TETRIS ASPLOS'17 paper Figure 8. ''' network = self.alex_net batch_size = 16 resource = Resource( proc_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(4, 4), type=NodeRegion.PROC), dram_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(4, 4), type=NodeRegion.DRAM), src_data_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(4, 4), type=NodeRegion.DRAM), dst_data_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(4, 4), type=NodeRegion.DRAM), dim_array=PhyDim2(14, 14), size_gbuf=133032 // 2, # 133 kB size_regf=512 // 2, # 512 B array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False, ) cost = Cost( mac_op=2e-12, mem_hier=(80e-12, 14e-12, 4e-12, 0.6e-12), # pJ/16-b noc_hop=40e-12, idl_unit=200e-12) options = Option(sw_gbuf_bypass=(True, True, True), sw_solve_loopblocking=True, partition_hybrid=True) pdb.set_trace() nnd = NNDataflow(network, batch_size, resource, cost, self.map_strategy) tops, _ = nnd.schedule_search(options) self.assertTrue(tops) dfsch_t16 = tops[0] ## Check results. # Same workload. #self.assertAlmostEqual(dfsch_t16.total_ops, dfsch_l1.total_ops) print('t16 ops: {}'.format(dfsch_t16.total_ops)) # Performance of T-16 is proportional to PE resource (20% margin). #self.assertLess(dfsch_t16.total_time, # 1.2 * dfsch_l1.total_time * (16 * 16) / (14 * 14 * 16)) print('t16_time: {}'.format(dfsch_t16.total_time)) # Energy reduced by > 30%. # self.assertLess(dfsch_t16.total_cost, dfsch_l1.total_cost * 0.7) # With dimension restriction on partitioning, this is slightly violated. #self.assertLess(dfsch_t16.total_cost, dfsch_l1.total_cost * 0.72) print('t16_energy: {}'.format(dfsch_t16.total_cost)) for i in dfsch_t16: print(str(i) + ',') ## Check results. # Results as cost for each component: header = 'ALU, DRAM, Buffer, Array, RF' cost_bkdn = {} for layer in dfsch_t16: layer = str(layer) op_cost = 0 access_cost = [0] * me.NUM for layer_part in network: if not layer_part or not layer_part.startswith(layer): continue sr = dfsch_t16[layer_part] op_cost += sr.total_ops * cost.mac_op access_cost = [ ac + a * c for ac, a, c in zip( access_cost, sr.total_accesses, cost.mem_hier) ] cost_bkdn[layer] = [] # To 1e9. cost_bkdn[layer].append(op_cost * 1e12 / 1e9) cost_bkdn[layer].append(access_cost[me.DRAM] * 1e12 / 1e9) cost_bkdn[layer].append(access_cost[me.GBUF] * 1e12 / 1e9) cost_bkdn[layer].append(access_cost[me.ITCN] * 1e12 / 1e9) cost_bkdn[layer].append(access_cost[me.REGF] * 1e12 / 1e9) for layer in cost_bkdn: print(cost_bkdn[layer])
def test_size(self): ''' Get size. ''' dim = PhyDim2(14, 12) self.assertEqual(dim.size(), 14 * 12, 'size')
def test_invalid_type(self): ''' Invalid type. ''' with self.assertRaisesRegexp(ValueError, 'NodeRegion: .*type.*'): _ = NodeRegion(dim=PhyDim2(4, 4), origin=PhyDim2(1, 3), type=NodeRegion.NUM)
def test_add(self): ''' Operation add. ''' dim1 = PhyDim2(14, 12) dim2 = PhyDim2(5, 3) self.assertTupleEqual(dim1 + dim2, (19, 15), 'add') self.assertTupleEqual(dim1 + 3, (17, 15), 'add')
def setUp(self): # Workload. self.layer = {} self.layer['BASE'] = ConvLayer(12, 10, 28, 3) self.layer['LGFIL'] = ConvLayer(2, 4, 28, 20) self.layer['POOL'] = PoolingLayer(32, 28, 2) self.layer['PAR'] = ConvLayer(24, 36, 56, 3) self.batch_size = 4 # Resource. self.resource = {} dim_array = PhyDim2(16, 16) proc_region = NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.PROC) data_region = NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.DRAM) # Typical resource. self.resource['BASE'] = Resource(proc_region=proc_region, dram_region=data_region, src_data_region=data_region, dst_data_region=data_region, dim_array=dim_array, size_gbuf=65536, size_regf=64, array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False) # Larger resource with sufficient capacity, to make all schemes valid. self.resource['LG'] = Resource(proc_region=proc_region, dram_region=data_region, src_data_region=data_region, dst_data_region=data_region, dim_array=dim_array, size_gbuf=1024**3, size_regf=1024**3, array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False) # Small resource. self.resource['SM'] = Resource(proc_region=proc_region, dram_region=data_region, src_data_region=data_region, dst_data_region=data_region, dim_array=dim_array, size_gbuf=4096, size_regf=16, array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False) # Multi-node parallel resource. self.resource['PAR'] = Resource(proc_region=NodeRegion( origin=PhyDim2(0, 0), dim=PhyDim2(4, 2), type=NodeRegion.PROC), dram_region=data_region, src_data_region=data_region, dst_data_region=data_region, dim_array=dim_array, size_gbuf=25000, size_regf=64, array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False) # Resource with no data regions. proc_data_region = NodeRegion(origin=PhyDim2(1, 1), dim=PhyDim2(1, 1), type=NodeRegion.PROC) self.resource['SRCNOTDATA'] = Resource( proc_region=proc_region, dram_region=data_region, src_data_region=proc_data_region, dst_data_region=data_region, dim_array=dim_array, size_gbuf=1024**3, size_regf=1024**3, array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False) self.resource['DSTNOTDATA'] = Resource( proc_region=proc_region, dram_region=data_region, src_data_region=data_region, dst_data_region=proc_data_region, dim_array=dim_array, size_gbuf=1024**3, size_regf=1024**3, array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False) self.resource['DATALOCAL'] = Resource(proc_region=proc_region, dram_region=data_region, src_data_region=proc_region, dst_data_region=proc_region, dim_array=dim_array, size_gbuf=1024**3, size_regf=1024**3, array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False) # Filter pinning. self.resource['FILPIN'] = Resource(proc_region=proc_region, dram_region=data_region, src_data_region=data_region, dst_data_region=data_region, dim_array=dim_array, size_gbuf=1024**3, size_regf=1024**3, array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=True) # Nested loop description after mapping. self.nld = {} self.nld['BASE'] = next( MapStrategyEyeriss(self.layer['BASE'], self.batch_size, 1, dim_array).gen_nested_loop_desc()) self.nld['LGFIL'] = next( MapStrategyEyeriss(self.layer['LGFIL'], self.batch_size, 1, dim_array).gen_nested_loop_desc()) self.nld['POOL'] = next( MapStrategyEyeriss(self.layer['POOL'], self.batch_size, 1, dim_array).gen_nested_loop_desc()) # Fake nested loop, with zero filter size. self.nld['ZERO_FIL'] = NestedLoopDesc( loopcnt=(12, 10, 4), usize_gbuf=(0, 1000, 800), usize_regf=(0, 3, 1), unit_access=((0, 1000, 800), (0, 1000, 800), (3, 9, 7), (1, 1, 1)), data_loops=(DataDimLoops(le.IFM, le.OFM), DataDimLoops(le.IFM, le.BAT), DataDimLoops(le.OFM, le.BAT)), unit_ops=1, unit_time=1) # Fake nested loop, with zero ifmap size. self.nld['ZERO_IFM'] = NestedLoopDesc( loopcnt=(12, 10, 4), usize_gbuf=(9, 0, 800), usize_regf=(3, 0, 1), unit_access=((9, 0, 800), (9, 0, 800), (3, 9, 7), (1, 1, 1)), data_loops=(DataDimLoops(le.IFM, le.OFM), DataDimLoops(le.IFM, le.BAT), DataDimLoops(le.OFM, le.BAT)), unit_ops=1, unit_time=1) # Fake partition scheme. self.part = PartitionScheme(range(pe.NUM), ((1, 1), ) * pe.NUM) # Fake buffer sharing scheme. self.bufshr = BufShrScheme(proc_region, self.part) # Options. self.options = {} # Basic. self.options['BASE'] = Option(ntops=2**30) # Multiprocessing. self.options['MP'] = Option(ntops=2**30, nprocesses=8) # Limited top schemes. self.options['NTOPS'] = Option(ntops=10) # Bypass. self.options['BYP'] = Option(sw_gbuf_bypass=(True, ) * 3, ntops=2**30) # Bypass solver. self.options['BYPSOL'] = Option(sw_gbuf_bypass=(True, ) * 3, sw_solve_loopblocking=True, ntops=2**30) # Access forwarding. self.options['ACCFWD'] = Option(hw_access_forwarding=True, ntops=2**30) # Buffer sharing. self.options['BUFSHR'] = Option(hw_gbuf_sharing=True, ntops=2**30) # Buffer sharing with bypassing. self.options['BUFSHR-BYP'] = Option(sw_gbuf_bypass=(True, ) * 3, hw_gbuf_sharing=True, ntops=2**30) # Constraint. self.none_cstr = SchedulingConstraint() self.cstr = SchedulingConstraint(topifm=1, topbat=1) # Cost. self.cost = Cost(mac_op=1, mem_hier=(200, 6, 2, 1), noc_hop=50, idl_unit=50)
def test_concat_invalid_type(self): ''' Concat invalid type. ''' with self.assertRaisesRegex(TypeError, 'DataLayout: .*concat.*'): _ = DataLayout.concat(self.dl1, self.frng1) with self.assertRaisesRegex(TypeError, 'DataLayout: .*concat.*'): _ = DataLayout.concat(self.dl1, PhyDim2(1, 3))