コード例 #1
0
    def setUp(self):

        self.alex_net = import_network('alex_net')
        self.vgg_net = import_network('vgg_net')

        self.map_strategy = MapStrategyEyeriss

        self.resource = Resource(
            proc_region=NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(1, 1),
                                   type=NodeRegion.PROC),
            data_regions=(NodeRegion(origin=PhyDim2(0, 0),
                                     dim=PhyDim2(1, 1),
                                     type=NodeRegion.DATA), ),
            dim_array=PhyDim2(16, 16),
            size_gbuf=128 * 1024 // 2,  # 128 kB
            size_regf=512 // 2,  # 512 B
        )

        self.cost = Cost(mac_op=1,
                         mem_hier=(200, 6, 2, 1),
                         noc_hop=0,
                         unit_static=0)

        self.options = Option()
コード例 #2
0
    def setUp(self):

        self.alex_net = import_network('alex_net')
        self.vgg_net = import_network('vgg_net')

        net = Network('simple')
        net.set_input_layer(InputLayer(4, 2))
        net.add('1', ConvLayer(4, 4, 2, 1))
        net.add('2', ConvLayer(4, 4, 2, 1))
        # Two more layers to avoid single-segment case.
        net.add('a1', ConvLayer(4, 1, 1, 1, strd=2))
        net.add('a2', ConvLayer(1, 1, 1, 1))
        self.simple_net = net

        net = Network('complex')
        net.set_input_layer(InputLayer(8, 8))
        net.add('1', ConvLayer(8, 8, 8, 1))
        net.add('2a', ConvLayer(8, 8, 8, 1), prevs=('1', ))
        net.add('3a', ConvLayer(8, 8, 8, 1))
        net.add('2b', ConvLayer(8, 8, 8, 1), prevs=('1', ))
        net.add('3b', ConvLayer(8, 8, 8, 1))
        net.add('4', ConvLayer(16, 8, 8, 1), prevs=('3a', '3b'))
        self.complex_net = net

        self.map_strategy = MapStrategyEyeriss

        self.resource = Resource(
            proc_region=NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(1, 1),
                                   type=NodeRegion.PROC),
            dram_region=NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(1, 1),
                                   type=NodeRegion.DRAM),
            src_data_region=NodeRegion(origin=PhyDim2(0, 0),
                                       dim=PhyDim2(1, 1),
                                       type=NodeRegion.DRAM),
            dst_data_region=NodeRegion(origin=PhyDim2(0, 0),
                                       dim=PhyDim2(1, 1),
                                       type=NodeRegion.DRAM),
            dim_array=PhyDim2(16, 16),
            size_gbuf=128 * 1024 // 2,  # 128 kB
            size_regf=512 // 2,  # 512 B
            array_bus_width=float('inf'),
            dram_bandwidth=float('inf'),
            no_time_mux=False,
        )

        self.cost = Cost(mac_op=1,
                         mem_hier=(200, 6, 2, 1),
                         noc_hop=0,
                         idl_unit=0)

        self.options = Option()
コード例 #3
0
ファイル: my_test.py プロジェクト: husnainmubarik/nn_dataflow
    def __init__(self):

        self.alex_net = import_network('alex_net')
        self.mock_net = import_network('mock_net')

        self.map_strategy = MapStrategyEyeriss

        value_mult = {}
        value_control = 1
        my_weights = {}
        self.cost = Cost(value_control=value_control,
                         value_mult=value_mult,
                         adder_cost=1,
                         mac_op=1,
                         mem_hier=(200, 6, 2, 1),
                         noc_hop=0,
                         idl_unit=0,
                         my_weights=my_weights,
                         mem_cycles=(200, 6, 2, 1))

        self.options = Option()
コード例 #4
0
    def __init__(self):

        self.alex_net = import_network('alex_net')

        self.map_strategy = MapStrategyEyeriss

        value_mult = {}
        value_control = {}
        my_weights = {}
        self.cost = Cost(value_control=value_control,
                         value_mult=value_mult,
                         mac_op=1,
                         mem_hier=(200, 6, 2, 1),
                         noc_hop=0,
                         idl_unit=0,
                         my_weights=my_weights)

        self.options = Option()

        print('mapping is  : {}'.format(self.map_strategy))
        print('cost is: {}'.format(self.cost))
        print('options are: {}'.format(self.options))
コード例 #5
0
def do_scheduling(args):
    '''
    Get optimal scheduling for given problem. Return a result schedule.
    '''

    ## Network.

    network = import_network(args.net)
    batch_size = args.batch

    ## Resource.

    dim_nodes = PhyDim2(*args.nodes)
    dim_array = PhyDim2(*args.array)

    # Sizes of gbuf and regf are in words.
    word = (args.word + 7) / 8
    size_gbuf = args.gbuf / word
    size_regf = args.regf / word

    array_bus_width = args.bus_width // args.word
    if not array_bus_width:
        array_bus_width = float('inf')
    dram_bandwidth = args.dram_bw / word

    proc_region = NodeRegion(dim=dim_nodes,
                             origin=PhyDim2(0, 0),
                             type=NodeRegion.PROC)

    if args.mem_type == '2D':
        # Memory nodes are on two sides.
        data_region = NodeRegion(dim=PhyDim2(2, 2),
                                 origin=PhyDim2(0, 0),
                                 dist=dim_nodes - PhyDim2(1, 1),
                                 type=NodeRegion.DRAM)
        assert data_region.rel2abs(PhyDim2(1, 1)) + PhyDim2(1, 1) \
                == proc_region.dim
    elif args.mem_type == '3D':
        # Memory nodes are on the top.
        data_region = NodeRegion(dim=dim_nodes,
                                 origin=PhyDim2(0, 0),
                                 type=NodeRegion.DRAM)

    resource = Resource(proc_region=proc_region,
                        dram_region=data_region,
                        src_data_region=data_region,
                        dst_data_region=data_region,
                        dim_array=dim_array,
                        size_gbuf=size_gbuf,
                        size_regf=size_regf,
                        array_bus_width=array_bus_width,
                        dram_bandwidth=dram_bandwidth,
                        no_time_mux=False)

    ## Cost.

    hier_cost = [0] * me.NUM
    hier_cost[me.DRAM] = args.hier_cost[0]
    hier_cost[me.GBUF] = args.hier_cost[1]
    hier_cost[me.ITCN] = args.hier_cost[2]
    hier_cost[me.REGF] = args.hier_cost[3]
    cost = Cost(mac_op=args.op_cost,
                mem_hier=tuple(hier_cost),
                noc_hop=args.hop_cost,
                idl_unit=args.unit_idle_cost)

    ## Options.

    bypass = [True] * de.NUM
    bypass[de.IFM] = 'i' not in args.disable_bypass
    bypass[de.OFM] = 'o' not in args.disable_bypass
    bypass[de.FIL] = 'f' not in args.disable_bypass
    options = Option(
        sw_gbuf_bypass=tuple(bypass),
        sw_solve_loopblocking=args.solve_loopblocking,
        hw_access_forwarding=args.enable_access_forwarding,
        hw_gbuf_sharing=args.enable_gbuf_sharing,
        hw_gbuf_save_writeback=args.enable_save_writeback,
        partition_hybrid=args.hybrid_partition,
        partition_batch=args.batch_partition,
        partition_ifmaps=args.ifmaps_partition,
        partition_interlayer=args.interlayer_partition,
        layer_pipeline_time_ovhd=args.layer_pipeline_time_overhead,
        layer_pipeline_max_degree=args.layer_pipeline_max_degree,
        layer_pipeline_opt=not args.disable_interlayer_opt,
        opt_goal=args.goal.lower(),
        ntops=args.top,
        nprocesses=args.processes,
        verbose=args.verbose)

    ## Search schedules.

    nnd = NNDataflow(network, batch_size, resource, cost, MapStrategyEyeriss)
    tbeg = time.time()
    tops, cache_stats = nnd.schedule_search(options)
    tend = time.time()
    telapsed = tend - tbeg

    if not tops:
        sys.stderr.write('No valid dataflow found.\n')
        return None

    top = tops[0]

    ## Write results.

    res_map = OrderedDict()

    res_map['version'] = get_version(with_local=True)

    res_map['net'] = args.net
    res_map['batch'] = args.batch

    res_map['resource'] = resource._asdict()
    res_map['cost'] = cost._asdict()
    res_map['options'] = options._asdict()

    res_map['cache_stats'] = cache_stats
    res_map['elapsed'] = telapsed

    stats = stats_dict(top, cost)
    for key, val in stats.items():
        res_map[key] = val

    return res_map
コード例 #6
0
def do_scheduling(args):
    '''
    Get optimal scheduling for given problem. Return a result schedule.
    '''

    ## Network.

    network = import_network(args.net)
    batch_size = args.batch

    ## Resource.

    dim_nodes = PhyDim2(*args.nodes)
    dim_array = PhyDim2(*args.array)

    # Sizes of gbuf and regf are in words.
    word = (args.word + 7) / 8
    size_gbuf = args.gbuf / word
    size_regf = args.regf / word

    proc_region = NodeRegion(dim=dim_nodes,
                             origin=PhyDim2(0, 0),
                             type=NodeRegion.PROC)

    if args.mem_type == '2D':
        # Memory nodes are on two sides.
        data_regions = (NodeRegion(dim=PhyDim2(h=dim_nodes.h, w=1),
                                   origin=PhyDim2(h=0, w=0),
                                   type=NodeRegion.DATA),
                        NodeRegion(dim=PhyDim2(h=dim_nodes.h, w=1),
                                   origin=PhyDim2(h=0, w=dim_nodes.w - 1),
                                   type=NodeRegion.DATA))
    elif args.mem_type == '3D':
        # All nodes have memory.
        data_regions = (NodeRegion(dim=dim_nodes,
                                   origin=PhyDim2(0, 0),
                                   type=NodeRegion.DATA), )

    resource = Resource(proc_region=proc_region,
                        data_regions=data_regions,
                        dim_array=dim_array,
                        size_gbuf=size_gbuf,
                        size_regf=size_regf)

    ## Cost.

    hier_cost = [0] * me.NUM
    hier_cost[me.DRAM] = args.hier_cost[0]
    hier_cost[me.GBUF] = args.hier_cost[1]
    hier_cost[me.ITCN] = args.hier_cost[2]
    hier_cost[me.REGF] = args.hier_cost[3]
    cost = Cost(mac_op=args.op_cost,
                mem_hier=tuple(hier_cost),
                noc_hop=args.hop_cost,
                unit_static=args.unit_static_cost)

    ## Options.

    bypass = [True] * de.NUM
    bypass[de.IFM] = 'i' not in args.disable_bypass
    bypass[de.OFM] = 'o' not in args.disable_bypass
    bypass[de.FIL] = 'f' not in args.disable_bypass
    options = Option(sw_gbuf_bypass=tuple(bypass),
                     sw_solve_loopblocking=args.solve_loopblocking,
                     partition_hybrid=args.hybrid_partition,
                     partition_batch=args.batch_partition,
                     partition_ifmaps=args.ifmaps_partition,
                     ntops=args.top,
                     nprocesses=args.processes,
                     verbose=args.verbose)

    ## Search schedules.

    nnd = NNDataflow(network, batch_size, resource, cost, MapStrategyEyeriss)
    tops, cache_stats = nnd.schedule_search(options)

    if not tops:
        sys.stderr.write('No valid dataflow found.\n')
        return None

    top = tops[0]

    ## Write results.

    res_map = OrderedDict()

    res_map['version'] = get_version(with_local=True)

    res_map['net'] = args.net
    res_map['batch'] = args.batch

    res_map['resource'] = resource._asdict()
    res_map['cost'] = cost._asdict()
    res_map['options'] = options._asdict()

    res_map['cache_stats'] = cache_stats

    stats = stats_dict(top, cost)
    for key, val in stats.items():
        res_map[key] = val

    return res_map
コード例 #7
0
    def setUp(self):

        self.net = {}

        net = Network('net1')
        # Linear.
        net.set_input_layer(InputLayer(10, 1))
        net.add('0', FCLayer(10, 20))
        net.add('1', FCLayer(20, 30))
        net.add('1p', PoolingLayer(30, 1, 1))
        net.add('2', FCLayer(30, 40))
        net.add('3', FCLayer(40, 50))
        self.net[net.net_name] = net

        net = Network('net2')
        # Long linear.
        net.set_input_layer(InputLayer(1, 1))
        for idx in range(16):
            net.add(str(idx), FCLayer(1, 1))
        self.net[net.net_name] = net

        net = Network('net3')
        # Fork.
        # /0-2\   /6- 7- 8\
        #   x  4-5         12
        # \1-3/   \9-10-11/
        net.set_input_layer(InputLayer(1, 1))
        net.add('0', FCLayer(1, 1), prevs=net.INPUT_LAYER_KEY)
        net.add('1', FCLayer(1, 1), prevs=net.INPUT_LAYER_KEY)
        net.add('2', FCLayer(2, 1), prevs=('0', '1'))
        net.add('2p', PoolingLayer(1, 1, 1))
        net.add('3', FCLayer(2, 1), prevs=('0', '1'))
        net.add('4', FCLayer(2, 1), prevs=('2p', '3'))
        net.add('5', FCLayer(1, 1))
        net.add('5p', PoolingLayer(1, 1, 1))
        net.add('6', FCLayer(1, 1), prevs='5p')
        net.add('7', FCLayer(1, 1))
        net.add('8', FCLayer(1, 1))
        net.add('9', FCLayer(1, 1), prevs='5p')
        net.add('10', FCLayer(1, 1))
        net.add('11', FCLayer(1, 1))
        net.add('12', FCLayer(2, 1), prevs=('8', '11'))
        self.net[net.net_name] = net

        net = Network('net4')
        # Complex fork.
        #          /5       \
        # 0-1-2-3-4-6-7-8-10-14
        #              \9/
        #          \11-12   /
        #          \13      /
        net.set_input_layer(InputLayer(1, 1))
        net.add('0', FCLayer(1, 1))
        net.add('1', FCLayer(1, 1))
        net.add('2', FCLayer(1, 1))
        net.add('3', FCLayer(1, 1))
        net.add('4', FCLayer(1, 1))
        net.add('5', FCLayer(1, 1), prevs='4')
        net.add('6', FCLayer(1, 1), prevs='4')
        net.add('7', FCLayer(1, 1))
        net.add('8', FCLayer(1, 1), prevs='7')
        net.add('9', FCLayer(1, 1), prevs='7')
        net.add('10', FCLayer(1, 1))
        net.add('10p', PoolingLayer(2, 1, 1), prevs=('8', '10'))
        net.add('11', PoolingLayer(1, 1, 1), prevs='4')
        net.add('12', FCLayer(1, 1))
        net.add('13', PoolingLayer(1, 1, 1), prevs='4')
        net.add('14', FCLayer(5, 1), prevs=('5', '10p', '12', '13'))
        self.net[net.net_name] = net

        net = Network('net5')
        # Corner cases.
        #  ----\
        # //1-2\ 7-8\
        # 0-3-4-x   10-11-12
        #  \ \5/ 9 /  \__/
        #   6--/
        net.set_input_layer(InputLayer(1, 1))
        net.add('0', FCLayer(1, 1))
        net.add('1', FCLayer(1, 1), prevs='0')
        net.add('2', FCLayer(1, 1))
        net.add('3', FCLayer(1, 1), prevs='0')
        net.add('4', FCLayer(1, 1), prevs='3')
        net.add('5', FCLayer(1, 1), prevs='3')
        net.add('6', FCLayer(1, 1), prevs='0')
        net.add('7', FCLayer(5, 1), prevs=('0', '2', '4', '5', '6'))
        net.add('8', FCLayer(1, 1))
        net.add('9', FCLayer(5, 1), prevs=('0', '2', '4', '5', '6'))
        net.add('10', FCLayer(2, 1), prevs=('8', '9'))
        net.add('11', FCLayer(1, 1))
        net.add('12', FCLayer(2, 1), prevs=('10', '11'))
        self.net[net.net_name] = net

        net = Network('net6')
        # Fmap sizes.
        net.set_input_layer(InputLayer(1, 24))
        net.add('0', ConvLayer(1, 1, 24, 3))
        net.add('1', ConvLayer(1, 1, 12, 3, strd=2))
        net.add('1p', PoolingLayer(1, 6, 2))
        net.add('2', ConvLayer(1, 1, 6, 3))
        net.add('3', ConvLayer(1, 1, 6, 3, strd=4), prevs=('0'))
        self.net[net.net_name] = net

        net = Network('net7')
        # Topological order: see a visited vertex again.
        #  /---
        # 0-1-\\
        #  \2--2p
        net.set_input_layer(InputLayer(1, 1))
        net.add('0', FCLayer(1, 1))
        net.add('1', FCLayer(1, 1), prevs='0')
        net.add('2', FCLayer(1, 1), prevs='0')
        net.add('2p', PoolingLayer(3, 1, 1), prevs=('0', '1', '2'))
        self.net[net.net_name] = net

        net = Network('net8')
        # Forward to the middle.
        #    /-\
        # 0-1-2-2p-4-4p
        #  \-3------/
        net.set_input_layer(InputLayer(1, 1))
        net.add('0', FCLayer(1, 1))
        net.add('1', FCLayer(1, 1), prevs='0')
        net.add('2', FCLayer(1, 1), prevs='1')
        net.add('2p', PoolingLayer(2, 1, 1), prevs=('1', '2'))
        net.add('3', FCLayer(1, 1), prevs='0')
        net.add('4', FCLayer(2, 1), prevs='2p')
        net.add('4p', PoolingLayer(2, 1, 1), prevs=('3', '4'))
        self.net[net.net_name] = net

        net = Network('net9')
        # Previous layers include input and others.
        net.set_input_layer(InputLayer(1, 1))
        net.add('0', FCLayer(1, 1))
        net.add('1', FCLayer(2, 1), prevs=(net.INPUT_LAYER_KEY, '0'))
        self.net[net.net_name] = net

        # Real networks.
        for net_name in all_networks():
            self.net[net_name] = import_network(net_name)

        self.batch_size = 16

        self.resource = Resource(
            proc_region=NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(8, 8),
                                   type=NodeRegion.PROC),
            dram_region=NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(8, 8),
                                   type=NodeRegion.DRAM),
            src_data_region=NodeRegion(origin=PhyDim2(0, 0),
                                       dim=PhyDim2(8, 4),
                                       type=NodeRegion.DRAM),
            dst_data_region=NodeRegion(origin=PhyDim2(0, 4),
                                       dim=PhyDim2(8, 4),
                                       type=NodeRegion.DRAM),
            dim_array=PhyDim2(16, 16),
            size_gbuf=65536,
            size_regf=64,
            array_bus_width=float('inf'),
            dram_bandwidth=float('inf'),
            no_time_mux=False)

        part = PartitionScheme(order=range(pe.NUM), pdims=[(1, 1)] * pe.NUM)
        self.ofmap_layout = DataLayout(
            frngs=(FmapRange((0, 0, 0, 0), (2, 4, 16, 16)), ),
            regions=(NodeRegion(origin=PhyDim2(0, 0),
                                dim=PhyDim2(1, 1),
                                type=NodeRegion.DRAM), ),
            parts=(part, ))
コード例 #8
0
ファイル: nn_layer_stats.py プロジェクト: wangzy/nn_dataflow
def layer_stats(args):
    ''' Print stats of layers in the network. '''

    network = import_network(args.net)
    word_bytes = (args.word + 7) // 8
    batch = args.batch

    hder_fmt = ','.join([STR_FMT_NAME] + [STR_FMT_NUMB_HDER] * 5) + '\n'
    line_fmt = ','.join([STR_FMT_NAME] + [STR_FMT_NUMB] * 5) + '\n'
    line_sep = '-' * int(STR_FMT_NAME_LEN) + '\n'

    # Header.
    sys.stdout.write(hder_fmt
                     .format('Layer',
                             'Ifmap/kB', 'Ofmap/kB', 'Weight/kB',
                             'MACs/M', 'MinOptBuf/kB'))

    # Aggregate stats.
    max_fmaps = 0
    max_filters = 0
    max_ops = 0
    sum_fmaps = 0
    sum_filters = 0
    sum_ops = 0
    convs = 0
    fcs = 0

    for name in network:

        layer = network[name]

        if isinstance(layer, FCLayer):
            fcs += 1
        elif isinstance(layer, ConvLayer):
            convs += 1

        ifmap_size = layer.total_ifmap_size(word_bytes) * batch / KILO
        ofmap_size = layer.total_ofmap_size(word_bytes) * batch / KILO
        try:
            filter_size = layer.total_filter_size(word_bytes) / KILO
        except AttributeError:
            filter_size = 0

        ops = layer.total_ops(batch) / MILLION

        # The minimum optimal buffer size is the sum of the full size (two
        # dimensions) for one data category, the size of one dimension for the
        # second, and the size of one point for the third.
        min_opt_buf_size = min(
            filter_size + (ifmap_size + ofmap_size / layer.nofm) / batch,
            filter_size + (ifmap_size / layer.nifm + ofmap_size) / batch,
            ifmap_size + (ofmap_size + filter_size / layer.nifm) / layer.nofm,
            ifmap_size + (ofmap_size / batch + filter_size) / layer.nofm,
            ofmap_size + (ifmap_size + filter_size / layer.nofm) / layer.nifm,
            ofmap_size + (ifmap_size / batch + filter_size) / layer.nifm)

        sys.stdout.write(line_fmt
                         .format(name,
                                 ifmap_size, ofmap_size, filter_size,
                                 ops, min_opt_buf_size))

        max_fmaps = max(max_fmaps, ofmap_size)
        max_filters = max(max_filters, filter_size)
        max_ops = max(max_ops, ops)
        sum_fmaps += ofmap_size
        sum_filters += filter_size
        sum_ops += ops

    sys.stdout.write(line_sep)

    sys.stdout.write(line_fmt
                     .format('MAX',
                             float('nan'), max_fmaps, max_filters,
                             max_ops, float('nan')))
    sys.stdout.write(line_fmt
                     .format('SUM',
                             float('nan'), sum_fmaps, sum_filters,
                             sum_ops, float('nan')))

    sys.stdout.write(line_sep)

    sys.stdout.write('# CONV layers = {}, # FC layers = {}\n'
                     .format(convs, fcs))
コード例 #9
0
ファイル: test_nns.py プロジェクト: wangzy/nn_dataflow
 def test_import_network_invalid(self):
     ''' Get import_network invalid. '''
     with self.assertRaisesRegexp(ImportError, 'nns: .*defined.*'):
         _ = nns.import_network('aaa')
コード例 #10
0
ファイル: test_nns.py プロジェクト: wangzy/nn_dataflow
 def test_import_network(self):
     ''' Get import_network. '''
     for name in nns.all_networks():
         network = nns.import_network(name)
         self.assertIsInstance(network, Network)