def setUp(self): self.alex_net = import_network('alex_net') self.vgg_net = import_network('vgg_net') self.map_strategy = MapStrategyEyeriss self.resource = Resource( proc_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.PROC), data_regions=(NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.DATA), ), dim_array=PhyDim2(16, 16), size_gbuf=128 * 1024 // 2, # 128 kB size_regf=512 // 2, # 512 B ) self.cost = Cost(mac_op=1, mem_hier=(200, 6, 2, 1), noc_hop=0, unit_static=0) self.options = Option()
def setUp(self): self.alex_net = import_network('alex_net') self.vgg_net = import_network('vgg_net') net = Network('simple') net.set_input_layer(InputLayer(4, 2)) net.add('1', ConvLayer(4, 4, 2, 1)) net.add('2', ConvLayer(4, 4, 2, 1)) # Two more layers to avoid single-segment case. net.add('a1', ConvLayer(4, 1, 1, 1, strd=2)) net.add('a2', ConvLayer(1, 1, 1, 1)) self.simple_net = net net = Network('complex') net.set_input_layer(InputLayer(8, 8)) net.add('1', ConvLayer(8, 8, 8, 1)) net.add('2a', ConvLayer(8, 8, 8, 1), prevs=('1', )) net.add('3a', ConvLayer(8, 8, 8, 1)) net.add('2b', ConvLayer(8, 8, 8, 1), prevs=('1', )) net.add('3b', ConvLayer(8, 8, 8, 1)) net.add('4', ConvLayer(16, 8, 8, 1), prevs=('3a', '3b')) self.complex_net = net self.map_strategy = MapStrategyEyeriss self.resource = Resource( proc_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.PROC), dram_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.DRAM), src_data_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.DRAM), dst_data_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.DRAM), dim_array=PhyDim2(16, 16), size_gbuf=128 * 1024 // 2, # 128 kB size_regf=512 // 2, # 512 B array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False, ) self.cost = Cost(mac_op=1, mem_hier=(200, 6, 2, 1), noc_hop=0, idl_unit=0) self.options = Option()
def __init__(self): self.alex_net = import_network('alex_net') self.mock_net = import_network('mock_net') self.map_strategy = MapStrategyEyeriss value_mult = {} value_control = 1 my_weights = {} self.cost = Cost(value_control=value_control, value_mult=value_mult, adder_cost=1, mac_op=1, mem_hier=(200, 6, 2, 1), noc_hop=0, idl_unit=0, my_weights=my_weights, mem_cycles=(200, 6, 2, 1)) self.options = Option()
def __init__(self): self.alex_net = import_network('alex_net') self.map_strategy = MapStrategyEyeriss value_mult = {} value_control = {} my_weights = {} self.cost = Cost(value_control=value_control, value_mult=value_mult, mac_op=1, mem_hier=(200, 6, 2, 1), noc_hop=0, idl_unit=0, my_weights=my_weights) self.options = Option() print('mapping is : {}'.format(self.map_strategy)) print('cost is: {}'.format(self.cost)) print('options are: {}'.format(self.options))
def do_scheduling(args): ''' Get optimal scheduling for given problem. Return a result schedule. ''' ## Network. network = import_network(args.net) batch_size = args.batch ## Resource. dim_nodes = PhyDim2(*args.nodes) dim_array = PhyDim2(*args.array) # Sizes of gbuf and regf are in words. word = (args.word + 7) / 8 size_gbuf = args.gbuf / word size_regf = args.regf / word array_bus_width = args.bus_width // args.word if not array_bus_width: array_bus_width = float('inf') dram_bandwidth = args.dram_bw / word proc_region = NodeRegion(dim=dim_nodes, origin=PhyDim2(0, 0), type=NodeRegion.PROC) if args.mem_type == '2D': # Memory nodes are on two sides. data_region = NodeRegion(dim=PhyDim2(2, 2), origin=PhyDim2(0, 0), dist=dim_nodes - PhyDim2(1, 1), type=NodeRegion.DRAM) assert data_region.rel2abs(PhyDim2(1, 1)) + PhyDim2(1, 1) \ == proc_region.dim elif args.mem_type == '3D': # Memory nodes are on the top. data_region = NodeRegion(dim=dim_nodes, origin=PhyDim2(0, 0), type=NodeRegion.DRAM) resource = Resource(proc_region=proc_region, dram_region=data_region, src_data_region=data_region, dst_data_region=data_region, dim_array=dim_array, size_gbuf=size_gbuf, size_regf=size_regf, array_bus_width=array_bus_width, dram_bandwidth=dram_bandwidth, no_time_mux=False) ## Cost. hier_cost = [0] * me.NUM hier_cost[me.DRAM] = args.hier_cost[0] hier_cost[me.GBUF] = args.hier_cost[1] hier_cost[me.ITCN] = args.hier_cost[2] hier_cost[me.REGF] = args.hier_cost[3] cost = Cost(mac_op=args.op_cost, mem_hier=tuple(hier_cost), noc_hop=args.hop_cost, idl_unit=args.unit_idle_cost) ## Options. bypass = [True] * de.NUM bypass[de.IFM] = 'i' not in args.disable_bypass bypass[de.OFM] = 'o' not in args.disable_bypass bypass[de.FIL] = 'f' not in args.disable_bypass options = Option( sw_gbuf_bypass=tuple(bypass), sw_solve_loopblocking=args.solve_loopblocking, hw_access_forwarding=args.enable_access_forwarding, hw_gbuf_sharing=args.enable_gbuf_sharing, hw_gbuf_save_writeback=args.enable_save_writeback, partition_hybrid=args.hybrid_partition, partition_batch=args.batch_partition, partition_ifmaps=args.ifmaps_partition, partition_interlayer=args.interlayer_partition, layer_pipeline_time_ovhd=args.layer_pipeline_time_overhead, layer_pipeline_max_degree=args.layer_pipeline_max_degree, layer_pipeline_opt=not args.disable_interlayer_opt, opt_goal=args.goal.lower(), ntops=args.top, nprocesses=args.processes, verbose=args.verbose) ## Search schedules. nnd = NNDataflow(network, batch_size, resource, cost, MapStrategyEyeriss) tbeg = time.time() tops, cache_stats = nnd.schedule_search(options) tend = time.time() telapsed = tend - tbeg if not tops: sys.stderr.write('No valid dataflow found.\n') return None top = tops[0] ## Write results. res_map = OrderedDict() res_map['version'] = get_version(with_local=True) res_map['net'] = args.net res_map['batch'] = args.batch res_map['resource'] = resource._asdict() res_map['cost'] = cost._asdict() res_map['options'] = options._asdict() res_map['cache_stats'] = cache_stats res_map['elapsed'] = telapsed stats = stats_dict(top, cost) for key, val in stats.items(): res_map[key] = val return res_map
def do_scheduling(args): ''' Get optimal scheduling for given problem. Return a result schedule. ''' ## Network. network = import_network(args.net) batch_size = args.batch ## Resource. dim_nodes = PhyDim2(*args.nodes) dim_array = PhyDim2(*args.array) # Sizes of gbuf and regf are in words. word = (args.word + 7) / 8 size_gbuf = args.gbuf / word size_regf = args.regf / word proc_region = NodeRegion(dim=dim_nodes, origin=PhyDim2(0, 0), type=NodeRegion.PROC) if args.mem_type == '2D': # Memory nodes are on two sides. data_regions = (NodeRegion(dim=PhyDim2(h=dim_nodes.h, w=1), origin=PhyDim2(h=0, w=0), type=NodeRegion.DATA), NodeRegion(dim=PhyDim2(h=dim_nodes.h, w=1), origin=PhyDim2(h=0, w=dim_nodes.w - 1), type=NodeRegion.DATA)) elif args.mem_type == '3D': # All nodes have memory. data_regions = (NodeRegion(dim=dim_nodes, origin=PhyDim2(0, 0), type=NodeRegion.DATA), ) resource = Resource(proc_region=proc_region, data_regions=data_regions, dim_array=dim_array, size_gbuf=size_gbuf, size_regf=size_regf) ## Cost. hier_cost = [0] * me.NUM hier_cost[me.DRAM] = args.hier_cost[0] hier_cost[me.GBUF] = args.hier_cost[1] hier_cost[me.ITCN] = args.hier_cost[2] hier_cost[me.REGF] = args.hier_cost[3] cost = Cost(mac_op=args.op_cost, mem_hier=tuple(hier_cost), noc_hop=args.hop_cost, unit_static=args.unit_static_cost) ## Options. bypass = [True] * de.NUM bypass[de.IFM] = 'i' not in args.disable_bypass bypass[de.OFM] = 'o' not in args.disable_bypass bypass[de.FIL] = 'f' not in args.disable_bypass options = Option(sw_gbuf_bypass=tuple(bypass), sw_solve_loopblocking=args.solve_loopblocking, partition_hybrid=args.hybrid_partition, partition_batch=args.batch_partition, partition_ifmaps=args.ifmaps_partition, ntops=args.top, nprocesses=args.processes, verbose=args.verbose) ## Search schedules. nnd = NNDataflow(network, batch_size, resource, cost, MapStrategyEyeriss) tops, cache_stats = nnd.schedule_search(options) if not tops: sys.stderr.write('No valid dataflow found.\n') return None top = tops[0] ## Write results. res_map = OrderedDict() res_map['version'] = get_version(with_local=True) res_map['net'] = args.net res_map['batch'] = args.batch res_map['resource'] = resource._asdict() res_map['cost'] = cost._asdict() res_map['options'] = options._asdict() res_map['cache_stats'] = cache_stats stats = stats_dict(top, cost) for key, val in stats.items(): res_map[key] = val return res_map
def setUp(self): self.net = {} net = Network('net1') # Linear. net.set_input_layer(InputLayer(10, 1)) net.add('0', FCLayer(10, 20)) net.add('1', FCLayer(20, 30)) net.add('1p', PoolingLayer(30, 1, 1)) net.add('2', FCLayer(30, 40)) net.add('3', FCLayer(40, 50)) self.net[net.net_name] = net net = Network('net2') # Long linear. net.set_input_layer(InputLayer(1, 1)) for idx in range(16): net.add(str(idx), FCLayer(1, 1)) self.net[net.net_name] = net net = Network('net3') # Fork. # /0-2\ /6- 7- 8\ # x 4-5 12 # \1-3/ \9-10-11/ net.set_input_layer(InputLayer(1, 1)) net.add('0', FCLayer(1, 1), prevs=net.INPUT_LAYER_KEY) net.add('1', FCLayer(1, 1), prevs=net.INPUT_LAYER_KEY) net.add('2', FCLayer(2, 1), prevs=('0', '1')) net.add('2p', PoolingLayer(1, 1, 1)) net.add('3', FCLayer(2, 1), prevs=('0', '1')) net.add('4', FCLayer(2, 1), prevs=('2p', '3')) net.add('5', FCLayer(1, 1)) net.add('5p', PoolingLayer(1, 1, 1)) net.add('6', FCLayer(1, 1), prevs='5p') net.add('7', FCLayer(1, 1)) net.add('8', FCLayer(1, 1)) net.add('9', FCLayer(1, 1), prevs='5p') net.add('10', FCLayer(1, 1)) net.add('11', FCLayer(1, 1)) net.add('12', FCLayer(2, 1), prevs=('8', '11')) self.net[net.net_name] = net net = Network('net4') # Complex fork. # /5 \ # 0-1-2-3-4-6-7-8-10-14 # \9/ # \11-12 / # \13 / net.set_input_layer(InputLayer(1, 1)) net.add('0', FCLayer(1, 1)) net.add('1', FCLayer(1, 1)) net.add('2', FCLayer(1, 1)) net.add('3', FCLayer(1, 1)) net.add('4', FCLayer(1, 1)) net.add('5', FCLayer(1, 1), prevs='4') net.add('6', FCLayer(1, 1), prevs='4') net.add('7', FCLayer(1, 1)) net.add('8', FCLayer(1, 1), prevs='7') net.add('9', FCLayer(1, 1), prevs='7') net.add('10', FCLayer(1, 1)) net.add('10p', PoolingLayer(2, 1, 1), prevs=('8', '10')) net.add('11', PoolingLayer(1, 1, 1), prevs='4') net.add('12', FCLayer(1, 1)) net.add('13', PoolingLayer(1, 1, 1), prevs='4') net.add('14', FCLayer(5, 1), prevs=('5', '10p', '12', '13')) self.net[net.net_name] = net net = Network('net5') # Corner cases. # ----\ # //1-2\ 7-8\ # 0-3-4-x 10-11-12 # \ \5/ 9 / \__/ # 6--/ net.set_input_layer(InputLayer(1, 1)) net.add('0', FCLayer(1, 1)) net.add('1', FCLayer(1, 1), prevs='0') net.add('2', FCLayer(1, 1)) net.add('3', FCLayer(1, 1), prevs='0') net.add('4', FCLayer(1, 1), prevs='3') net.add('5', FCLayer(1, 1), prevs='3') net.add('6', FCLayer(1, 1), prevs='0') net.add('7', FCLayer(5, 1), prevs=('0', '2', '4', '5', '6')) net.add('8', FCLayer(1, 1)) net.add('9', FCLayer(5, 1), prevs=('0', '2', '4', '5', '6')) net.add('10', FCLayer(2, 1), prevs=('8', '9')) net.add('11', FCLayer(1, 1)) net.add('12', FCLayer(2, 1), prevs=('10', '11')) self.net[net.net_name] = net net = Network('net6') # Fmap sizes. net.set_input_layer(InputLayer(1, 24)) net.add('0', ConvLayer(1, 1, 24, 3)) net.add('1', ConvLayer(1, 1, 12, 3, strd=2)) net.add('1p', PoolingLayer(1, 6, 2)) net.add('2', ConvLayer(1, 1, 6, 3)) net.add('3', ConvLayer(1, 1, 6, 3, strd=4), prevs=('0')) self.net[net.net_name] = net net = Network('net7') # Topological order: see a visited vertex again. # /--- # 0-1-\\ # \2--2p net.set_input_layer(InputLayer(1, 1)) net.add('0', FCLayer(1, 1)) net.add('1', FCLayer(1, 1), prevs='0') net.add('2', FCLayer(1, 1), prevs='0') net.add('2p', PoolingLayer(3, 1, 1), prevs=('0', '1', '2')) self.net[net.net_name] = net net = Network('net8') # Forward to the middle. # /-\ # 0-1-2-2p-4-4p # \-3------/ net.set_input_layer(InputLayer(1, 1)) net.add('0', FCLayer(1, 1)) net.add('1', FCLayer(1, 1), prevs='0') net.add('2', FCLayer(1, 1), prevs='1') net.add('2p', PoolingLayer(2, 1, 1), prevs=('1', '2')) net.add('3', FCLayer(1, 1), prevs='0') net.add('4', FCLayer(2, 1), prevs='2p') net.add('4p', PoolingLayer(2, 1, 1), prevs=('3', '4')) self.net[net.net_name] = net net = Network('net9') # Previous layers include input and others. net.set_input_layer(InputLayer(1, 1)) net.add('0', FCLayer(1, 1)) net.add('1', FCLayer(2, 1), prevs=(net.INPUT_LAYER_KEY, '0')) self.net[net.net_name] = net # Real networks. for net_name in all_networks(): self.net[net_name] = import_network(net_name) self.batch_size = 16 self.resource = Resource( proc_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(8, 8), type=NodeRegion.PROC), dram_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(8, 8), type=NodeRegion.DRAM), src_data_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(8, 4), type=NodeRegion.DRAM), dst_data_region=NodeRegion(origin=PhyDim2(0, 4), dim=PhyDim2(8, 4), type=NodeRegion.DRAM), dim_array=PhyDim2(16, 16), size_gbuf=65536, size_regf=64, array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False) part = PartitionScheme(order=range(pe.NUM), pdims=[(1, 1)] * pe.NUM) self.ofmap_layout = DataLayout( frngs=(FmapRange((0, 0, 0, 0), (2, 4, 16, 16)), ), regions=(NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.DRAM), ), parts=(part, ))
def layer_stats(args): ''' Print stats of layers in the network. ''' network = import_network(args.net) word_bytes = (args.word + 7) // 8 batch = args.batch hder_fmt = ','.join([STR_FMT_NAME] + [STR_FMT_NUMB_HDER] * 5) + '\n' line_fmt = ','.join([STR_FMT_NAME] + [STR_FMT_NUMB] * 5) + '\n' line_sep = '-' * int(STR_FMT_NAME_LEN) + '\n' # Header. sys.stdout.write(hder_fmt .format('Layer', 'Ifmap/kB', 'Ofmap/kB', 'Weight/kB', 'MACs/M', 'MinOptBuf/kB')) # Aggregate stats. max_fmaps = 0 max_filters = 0 max_ops = 0 sum_fmaps = 0 sum_filters = 0 sum_ops = 0 convs = 0 fcs = 0 for name in network: layer = network[name] if isinstance(layer, FCLayer): fcs += 1 elif isinstance(layer, ConvLayer): convs += 1 ifmap_size = layer.total_ifmap_size(word_bytes) * batch / KILO ofmap_size = layer.total_ofmap_size(word_bytes) * batch / KILO try: filter_size = layer.total_filter_size(word_bytes) / KILO except AttributeError: filter_size = 0 ops = layer.total_ops(batch) / MILLION # The minimum optimal buffer size is the sum of the full size (two # dimensions) for one data category, the size of one dimension for the # second, and the size of one point for the third. min_opt_buf_size = min( filter_size + (ifmap_size + ofmap_size / layer.nofm) / batch, filter_size + (ifmap_size / layer.nifm + ofmap_size) / batch, ifmap_size + (ofmap_size + filter_size / layer.nifm) / layer.nofm, ifmap_size + (ofmap_size / batch + filter_size) / layer.nofm, ofmap_size + (ifmap_size + filter_size / layer.nofm) / layer.nifm, ofmap_size + (ifmap_size / batch + filter_size) / layer.nifm) sys.stdout.write(line_fmt .format(name, ifmap_size, ofmap_size, filter_size, ops, min_opt_buf_size)) max_fmaps = max(max_fmaps, ofmap_size) max_filters = max(max_filters, filter_size) max_ops = max(max_ops, ops) sum_fmaps += ofmap_size sum_filters += filter_size sum_ops += ops sys.stdout.write(line_sep) sys.stdout.write(line_fmt .format('MAX', float('nan'), max_fmaps, max_filters, max_ops, float('nan'))) sys.stdout.write(line_fmt .format('SUM', float('nan'), sum_fmaps, sum_filters, sum_ops, float('nan'))) sys.stdout.write(line_sep) sys.stdout.write('# CONV layers = {}, # FC layers = {}\n' .format(convs, fcs))
def test_import_network_invalid(self): ''' Get import_network invalid. ''' with self.assertRaisesRegexp(ImportError, 'nns: .*defined.*'): _ = nns.import_network('aaa')
def test_import_network(self): ''' Get import_network. ''' for name in nns.all_networks(): network = nns.import_network(name) self.assertIsInstance(network, Network)