Ejemplo n.º 1
0
def argparser():
    ''' Argument parser. '''

    ap = argparse.ArgumentParser()

    ap.add_argument('net',
                    help='network name, should be a .py file under "nns". '
                    'Choices: {}.'.format(', '.join(all_networks())))

    ap.add_argument('--batch', type=int, required=True, help='batch size')
    ap.add_argument('--word', type=int, default=16, help='word size in bits')

    ap.add_argument('--nodes',
                    type=int,
                    nargs=2,
                    required=True,
                    metavar=('H', 'W'),
                    help='Parallel node partitioning dimensions')
    ap.add_argument('--array',
                    type=int,
                    nargs=2,
                    required=True,
                    metavar=('H', 'W'),
                    help='PE array dimensions')

    ap.add_argument('--regf',
                    type=int,
                    required=True,
                    help='register file size in bytes per PE')
    ap.add_argument('--gbuf',
                    type=int,
                    required=True,
                    help='global buffer size in bytes')

    ap.add_argument('--bus-width',
                    type=int,
                    default=0,
                    help='array bus width in bits. set 0 to ignore')
    ap.add_argument('--dram-bw',
                    type=float,
                    default='inf',
                    help='total DRAM bandwidth in bytes per cycle.')

    ap.add_argument('--op-cost',
                    type=float,
                    default=1,
                    help='cost of arithmetic operation')
    ap.add_argument('--hier-cost',
                    type=float,
                    nargs=4,
                    default=[200, 6, 2, 1],
                    metavar=('DRAM_COST', 'GBUF_COST', 'ITCN_COST',
                             'REGF_COST'),
                    help='cost of access to memory hierarchy')
    ap.add_argument('--hop-cost',
                    type=float,
                    default=10,
                    help='cost of access through one NoC hop')
    ap.add_argument('--unit-idle-cost',
                    type=float,
                    default=0,
                    help='static cost over all nodes for unit execution time')

    ap.add_argument('--mem-type',
                    default='2D',
                    choices=['2D', '3D'],
                    help='memory type. "2D" has memory only on edge nodes; '
                    '"3D" has memory vertially on top of all nodes.')

    ap.add_argument('--disable-bypass',
                    nargs='*',
                    default=[],
                    choices=['i', 'o', 'f'],
                    help='whether disallowing gbuf bypass for i (input), o '
                    '(output), or f (filter)')
    ap.add_argument('--solve-loopblocking',
                    action='store_true',
                    help='Use analytical solver to choose loop blocking. '
                    'Otherwise use exhaustive search.')
    ap.add_argument('--enable-access-forwarding',
                    action='store_true',
                    help='Each node fetches a subset of data and forwards to '
                    'other nodes.')
    ap.add_argument('--enable-gbuf-sharing',
                    action='store_true',
                    help='Share gbuf capacity across nodes through NoC.')
    ap.add_argument('--enable-save-writeback',
                    action='store_true',
                    help='Allow to save the writeback to memory for the '
                    'intermediate data between layers if able to '
                    'store the entire data set in on-chip buffers.')
    ap.add_argument('--disable-interlayer-opt',
                    '--basic-interlayer-partition',
                    action='store_true',
                    help='Disable optimizations and only allow basic '
                    'inter-layer pipeline.')

    ap.add_argument(
        '--hybrid-partition',
        '--hybrid-partition2d',  # deprecated old name
        action='store_true',
        help='Use hybrid partition for layer for node mapping. '
        'Otherwise use naive method based on layer type.')
    ap.add_argument('--batch-partition',
                    action='store_true',
                    help='Allow partitioning batch, i.e., consider data '
                    'parallelism.')
    ap.add_argument('--ifmaps-partition',
                    '--ifmap-partition',
                    action='store_true',
                    help='Allow partitioning ifmap channel dimension, which '
                    'requires extra data synchronization.')
    ap.add_argument('--interlayer-partition',
                    '--inter-layer-partition',
                    action='store_true',
                    help='Allow partitioning resources across multiple layers '
                    'and process them simultaneously as an inter-layer '
                    'pipeline.')

    ap.add_argument('--layer-pipeline-time-overhead',
                    type=float,
                    default=float('inf'),
                    help='maximum allowed execution time overhead due to '
                    'layer pipelining.')
    ap.add_argument('--layer-pipeline-max-degree',
                    type=float,
                    default=float('inf'),
                    help='maximum allowed layer pipelining degree, i.e., '
                    'number of vertices in a pipeline segment.')

    ap.add_argument('-g',
                    '--goal',
                    default='e',
                    choices=['e', 'd', 'ed', 'E', 'D', 'ED'],
                    help='Goal of optimization: E(nergy), D(elay), or ED.')
    ap.add_argument('-t',
                    '--top',
                    type=int,
                    default=1,
                    help='Number of top schedules to keep during search.')
    ap.add_argument('-p',
                    '--processes',
                    type=int,
                    default=multiprocessing.cpu_count() / 2,
                    help='Number of parallel processes to use for search.')
    ap.add_argument('-v',
                    '--verbose',
                    action='store_true',
                    help='Show progress and details.')

    return ap
Ejemplo n.º 2
0
def argparser():
    ''' Argument parser. '''

    ap = argparse.ArgumentParser()

    ap.add_argument('net',
                    help='network name, should be a .py file under "nns". '
                    'Choices: {}.'.format(', '.join(all_networks())))

    ap.add_argument('--batch', type=int, required=True, help='batch size')
    ap.add_argument('--word', type=int, default=16, help='word size in bits')

    ap.add_argument('--nodes',
                    type=int,
                    nargs=2,
                    required=True,
                    metavar=('H', 'W'),
                    help='Parallel node partitioning dimensions')
    ap.add_argument('--array',
                    type=int,
                    nargs=2,
                    required=True,
                    metavar=('H', 'W'),
                    help='PE array dimensions')

    ap.add_argument('--regf',
                    type=int,
                    required=True,
                    help='register file size in bytes per PE')
    ap.add_argument('--gbuf',
                    type=int,
                    required=True,
                    help='global buffer size in bytes')

    ap.add_argument('--op-cost',
                    type=float,
                    default=1,
                    help='cost of arithmetic operation')
    ap.add_argument('--hier-cost',
                    type=float,
                    nargs=4,
                    default=[200, 6, 2, 1],
                    metavar=('DRAM_COST', 'GBUF_COST', 'ITCN_COST',
                             'REGF_COST'),
                    help='cost of access to memory hierarchy')
    ap.add_argument('--hop-cost',
                    type=float,
                    default=10,
                    help='cost of access through one NoC hop')
    ap.add_argument('--unit-static-cost',
                    type=float,
                    default=0,
                    help='static cost for unit execution time')

    ap.add_argument('--mem-type',
                    default='2D',
                    choices=['2D', '3D'],
                    help='memory type. "2D" has memory only on edge nodes; '
                    '"3D" has memory vertially on top of all nodes.')

    ap.add_argument('--disable-bypass',
                    nargs='*',
                    default=[],
                    choices=['i', 'o', 'f'],
                    help='whether disallowing gbuf bypass for i (input), o '
                    '(output), or f (filter)')
    ap.add_argument('--solve-loopblocking',
                    action='store_true',
                    help='Use analytical solver to choose loop blocking. '
                    'Otherwise use exhaustive search.')

    ap.add_argument(
        '--hybrid-partition',
        '--hybrid-partition2d',  # deprecated old name
        action='store_true',
        help='Use hybrid partition for layer for node mapping. '
        'Otherwise use naive method based on layer type.')
    ap.add_argument('--batch-partition',
                    action='store_true',
                    help='Allow partitioning batch, i.e., consider data '
                    'parallelism.')
    ap.add_argument('--ifmaps-partition',
                    '--ifmap-partition',
                    action='store_true',
                    help='Allow partitioning ifmap channel dimension, which '
                    'requires extra data synchronization.')

    ap.add_argument('-t',
                    '--top',
                    type=int,
                    default=1,
                    help='Number of top schedules to keep during search.')
    ap.add_argument('-p',
                    '--processes',
                    type=int,
                    default=multiprocessing.cpu_count() / 2,
                    help='Number of parallel processes to use for search.')
    ap.add_argument('-v',
                    '--verbose',
                    action='store_true',
                    help='Show progress and details.')

    return ap
Ejemplo n.º 3
0
 def test_import_network(self):
     ''' Get import_network. '''
     for name in nns.all_networks():
         network = nns.import_network(name)
         self.assertIsInstance(network, Network)
    def setUp(self):

        self.net = {}

        net = Network('net1')
        # Linear.
        net.set_input_layer(InputLayer(10, 1))
        net.add('0', FCLayer(10, 20))
        net.add('1', FCLayer(20, 30))
        net.add('1p', PoolingLayer(30, 1, 1))
        net.add('2', FCLayer(30, 40))
        net.add('3', FCLayer(40, 50))
        self.net[net.net_name] = net

        net = Network('net2')
        # Long linear.
        net.set_input_layer(InputLayer(1, 1))
        for idx in range(16):
            net.add(str(idx), FCLayer(1, 1))
        self.net[net.net_name] = net

        net = Network('net3')
        # Fork.
        # /0-2\   /6- 7- 8\
        #   x  4-5         12
        # \1-3/   \9-10-11/
        net.set_input_layer(InputLayer(1, 1))
        net.add('0', FCLayer(1, 1), prevs=net.INPUT_LAYER_KEY)
        net.add('1', FCLayer(1, 1), prevs=net.INPUT_LAYER_KEY)
        net.add('2', FCLayer(2, 1), prevs=('0', '1'))
        net.add('2p', PoolingLayer(1, 1, 1))
        net.add('3', FCLayer(2, 1), prevs=('0', '1'))
        net.add('4', FCLayer(2, 1), prevs=('2p', '3'))
        net.add('5', FCLayer(1, 1))
        net.add('5p', PoolingLayer(1, 1, 1))
        net.add('6', FCLayer(1, 1), prevs='5p')
        net.add('7', FCLayer(1, 1))
        net.add('8', FCLayer(1, 1))
        net.add('9', FCLayer(1, 1), prevs='5p')
        net.add('10', FCLayer(1, 1))
        net.add('11', FCLayer(1, 1))
        net.add('12', FCLayer(2, 1), prevs=('8', '11'))
        self.net[net.net_name] = net

        net = Network('net4')
        # Complex fork.
        #          /5       \
        # 0-1-2-3-4-6-7-8-10-14
        #              \9/
        #          \11-12   /
        #          \13      /
        net.set_input_layer(InputLayer(1, 1))
        net.add('0', FCLayer(1, 1))
        net.add('1', FCLayer(1, 1))
        net.add('2', FCLayer(1, 1))
        net.add('3', FCLayer(1, 1))
        net.add('4', FCLayer(1, 1))
        net.add('5', FCLayer(1, 1), prevs='4')
        net.add('6', FCLayer(1, 1), prevs='4')
        net.add('7', FCLayer(1, 1))
        net.add('8', FCLayer(1, 1), prevs='7')
        net.add('9', FCLayer(1, 1), prevs='7')
        net.add('10', FCLayer(1, 1))
        net.add('10p', PoolingLayer(2, 1, 1), prevs=('8', '10'))
        net.add('11', PoolingLayer(1, 1, 1), prevs='4')
        net.add('12', FCLayer(1, 1))
        net.add('13', PoolingLayer(1, 1, 1), prevs='4')
        net.add('14', FCLayer(5, 1), prevs=('5', '10p', '12', '13'))
        self.net[net.net_name] = net

        net = Network('net5')
        # Corner cases.
        #  ----\
        # //1-2\ 7-8\
        # 0-3-4-x   10-11-12
        #  \ \5/ 9 /  \__/
        #   6--/
        net.set_input_layer(InputLayer(1, 1))
        net.add('0', FCLayer(1, 1))
        net.add('1', FCLayer(1, 1), prevs='0')
        net.add('2', FCLayer(1, 1))
        net.add('3', FCLayer(1, 1), prevs='0')
        net.add('4', FCLayer(1, 1), prevs='3')
        net.add('5', FCLayer(1, 1), prevs='3')
        net.add('6', FCLayer(1, 1), prevs='0')
        net.add('7', FCLayer(5, 1), prevs=('0', '2', '4', '5', '6'))
        net.add('8', FCLayer(1, 1))
        net.add('9', FCLayer(5, 1), prevs=('0', '2', '4', '5', '6'))
        net.add('10', FCLayer(2, 1), prevs=('8', '9'))
        net.add('11', FCLayer(1, 1))
        net.add('12', FCLayer(2, 1), prevs=('10', '11'))
        self.net[net.net_name] = net

        net = Network('net6')
        # Fmap sizes.
        net.set_input_layer(InputLayer(1, 24))
        net.add('0', ConvLayer(1, 1, 24, 3))
        net.add('1', ConvLayer(1, 1, 12, 3, strd=2))
        net.add('1p', PoolingLayer(1, 6, 2))
        net.add('2', ConvLayer(1, 1, 6, 3))
        net.add('3', ConvLayer(1, 1, 6, 3, strd=4), prevs=('0'))
        self.net[net.net_name] = net

        net = Network('net7')
        # Topological order: see a visited vertex again.
        #  /---
        # 0-1-\\
        #  \2--2p
        net.set_input_layer(InputLayer(1, 1))
        net.add('0', FCLayer(1, 1))
        net.add('1', FCLayer(1, 1), prevs='0')
        net.add('2', FCLayer(1, 1), prevs='0')
        net.add('2p', PoolingLayer(3, 1, 1), prevs=('0', '1', '2'))
        self.net[net.net_name] = net

        net = Network('net8')
        # Forward to the middle.
        #    /-\
        # 0-1-2-2p-4-4p
        #  \-3------/
        net.set_input_layer(InputLayer(1, 1))
        net.add('0', FCLayer(1, 1))
        net.add('1', FCLayer(1, 1), prevs='0')
        net.add('2', FCLayer(1, 1), prevs='1')
        net.add('2p', PoolingLayer(2, 1, 1), prevs=('1', '2'))
        net.add('3', FCLayer(1, 1), prevs='0')
        net.add('4', FCLayer(2, 1), prevs='2p')
        net.add('4p', PoolingLayer(2, 1, 1), prevs=('3', '4'))
        self.net[net.net_name] = net

        net = Network('net9')
        # Previous layers include input and others.
        net.set_input_layer(InputLayer(1, 1))
        net.add('0', FCLayer(1, 1))
        net.add('1', FCLayer(2, 1), prevs=(net.INPUT_LAYER_KEY, '0'))
        self.net[net.net_name] = net

        # Real networks.
        for net_name in all_networks():
            self.net[net_name] = import_network(net_name)

        self.batch_size = 16

        self.resource = Resource(
            proc_region=NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(8, 8),
                                   type=NodeRegion.PROC),
            dram_region=NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(8, 8),
                                   type=NodeRegion.DRAM),
            src_data_region=NodeRegion(origin=PhyDim2(0, 0),
                                       dim=PhyDim2(8, 4),
                                       type=NodeRegion.DRAM),
            dst_data_region=NodeRegion(origin=PhyDim2(0, 4),
                                       dim=PhyDim2(8, 4),
                                       type=NodeRegion.DRAM),
            dim_array=PhyDim2(16, 16),
            size_gbuf=65536,
            size_regf=64,
            array_bus_width=float('inf'),
            dram_bandwidth=float('inf'),
            no_time_mux=False)

        part = PartitionScheme(order=range(pe.NUM), pdims=[(1, 1)] * pe.NUM)
        self.ofmap_layout = DataLayout(
            frngs=(FmapRange((0, 0, 0, 0), (2, 4, 16, 16)), ),
            regions=(NodeRegion(origin=PhyDim2(0, 0),
                                dim=PhyDim2(1, 1),
                                type=NodeRegion.DRAM), ),
            parts=(part, ))
Ejemplo n.º 5
0
 def test_all_networks(self):
     ''' Get all_networks. '''
     self.assertIn('alex_net', nns.all_networks())
     self.assertIn('vgg_net', nns.all_networks())
     self.assertGreater(len(nns.all_networks()), 5)