Esempio n. 1
0
    def test_cycles_per_layer(self):
        l = CaffeLoader(
            None,
            "./FINN/inputs/dorefanet-pruned-without-extra-messages.prototxt")
        net = nn.NN(l)
        dev = device.Device('XLNX:KU115.json')
        perfmodel = pm.PerfModel(net, dev)
        fps = perfmodel.maximise_fps()
        for idx, layer in enumerate(net.layers):
            in_chans = net.layers[idx].getInputSize()
            out_chans = net.layers[idx].getOutputSize()
            out_dim = net.layers[idx].get_out_dim()

            if isinstance(in_chans, tuple):
                print in_chans
                in_chans = in_chans[0]
            if isinstance(out_chans, tuple):
                print out_chans
                out_chans = out_chans[0]
            if isinstance(out_dim, tuple):
                print out_dim
                out_dim = out_dim[0]

            print perfmodel.SIMD[idx], in_chans
            print perfmodel.PE[idx], out_chans
            print perfmodel.MMV[idx], out_dim
            self.assertLessEqual(perfmodel.SIMD[idx], in_chans)
            self.assertLessEqual(perfmodel.PE[idx], out_chans)
            self.assertLessEqual(perfmodel.MMV[idx], out_dim)
Esempio n. 2
0
 def test_simd_pe_mmv_constraints(self):
     l = CaffeLoader(None, "./FINN/inputs/sfc.prototxt")
     net = nn.NN(l)
     dev = device.Device('XLNX:KU115.json')
     perfmodel = pm.PerfModel(net, dev)
     fps = perfmodel.maximise_fps()
     for idx, layer in enumerate(net.layers):
         self.assertLessEqual(perfmodel.SIMD[idx], layer.getInputSize())
         self.assertLessEqual(perfmodel.PE[idx], layer.getOutputSize())
         self.assertLessEqual(perfmodel.MMV[idx], layer.get_out_dim())
Esempio n. 3
0
 def test_cycles_per_op(self):
     l = CaffeLoader("./FINN/inputs/sfc.caffemodel",
                     "./FINN/inputs/sfc.prototxt")
     net = nn.NN(l)
     dev = device.Device('XLNX:VU9P.json')
     perfmodel = pm.PerfModel(net, dev)
     ops = perfmodel.network_utilisation()
     num_matrix_layers = net.count_matrix_layers()
     self.assertEqual(ops['luts'],
                      2 * num_matrix_layers * dev.lut_cost_per_op())
Esempio n. 4
0
 def res_alloc_predetermined(pipeline, net, dev):
     ret_pipeline = copy.deepcopy(pipeline)
     print "PIPELINE: ", ret_pipeline
     net.layers = ret_pipeline
     perfmodel = pm.PerfModel(net, dev)
     fps = perfmodel.maximise_fps()
     for i in range(len(ret_pipeline)):
         ret_pipeline[i].simd = perfmodel.SIMD[i]
         print "SIMD:", ret_pipeline[i].simd
         ret_pipeline[i].pe = perfmodel.PE[i]
         print "PE:", ret_pipeline[i].pe
     return ret_pipeline
Esempio n. 5
0
def demo_lfc():
    logging.basicConfig(
        filename='FINN.log',
        level=logging.INFO)  # Changed WARNING to INFO if you want logging
    lfcnetwork = []
    W0 = np.zeros((1024, 832))  # OutChans, InChans
    W1 = np.zeros((1024, 1024))
    W2 = np.zeros((1024, 1024))
    W3 = np.zeros((64, 1024))

    lfcnetwork.append(layers.FullyConnectedLayer(W0, 1, 1,
                                                 1))  # wbits, ibits, obits
    lfcnetwork.append(layers.FullyConnectedLayer(W1, 1, 1, 1))
    lfcnetwork.append(layers.FullyConnectedLayer(W2, 1, 1, 1))
    lfcnetwork.append(layers.FullyConnectedLayer(W3, 1, 1, 1))

    net = FINN.core.nn.NN(layers=lfcnetwork)

    dev = device.Device('XLNX:VU9P.json', frequency=192.4)
    perf = perf_model.PerfModel(net, dev)

    fps = perf.maximise_fps()

    # perf.SIMD[0] = 64
    # perf.SIMD[1] = 64
    # perf.SIMD[2] = 64
    # perf.SIMD[3] = 64
    #
    # perf.PE[0] = 256
    # perf.PE[1] = 256
    # perf.PE[2] = 256
    # perf.PE[3] = 16

    fps = perf.fps()

    perf.nswg.calculate_neural_folding()
    perf.nswg.calculate_write_block_cycles()
    perf.nswg.calculate_read_block_cycles()
    perf.nswg.calculate_total_cycles()
    perf.nswg.calculate_input_multipliers()
    perf.print_folding_factors()
    perf.print_hardware_cost()
    perf.print_topology()
    perf.print_cycles()
    fps = perf.fps()

    print "Achieved fps of %f with %f%% LUT utilisation and %f%% BRAM utilisation at %f Mhz" % (
        fps, perf.network_utilisation()['luts'] / dev.luts * 100,
        perf.network_utilisation()['brams'] / dev.brams * 100, dev.frequency)
Esempio n. 6
0
def demo_hwgq_import():
    l = CaffeLoader(None, "inputs/sfc.prototxt")
    net = FINN.core.nn.NN(l)
    dev = device.Device('XLNX:KU115.json')
    perf = perf_model.PerfModel(net, dev)

    perf.print_folding_factors()
    perf.print_hardware_cost()

    for idx, val in enumerate(perf.SIMD):
        perf.SIMD[idx] = 5
        #perf.PE[idx]  = 10
    perf.print_folding_factors()
    perf.print_hardware_cost()

    for idx, val in enumerate(perf.SIMD):
        perf.SIMD[idx] = 20
        #perf.PE[idx]  = 100
    perf.print_folding_factors()
    perf.print_hardware_cost()
Esempio n. 7
0
def demo_sfc():
    logging.basicConfig(
        filename='FINN.log',
        level=logging.INFO)  # Changed WARNING to INFO if you want logging
    sfcnetwork = []

    W0 = np.zeros((64, 3, 3, 3))  # out, in, kernel, kernel
    W1 = np.zeros((64, 64, 3, 3))
    W2 = np.zeros((128, 64, 3, 3))
    W3 = np.zeros((128, 128, 3, 3))
    W4 = np.zeros((256, 128, 3, 3))
    W5 = np.zeros((256, 256, 3, 3))
    W6 = np.zeros((512, 256))
    W7 = np.zeros((512, 512))
    W8 = np.zeros((10, 512))

    sfcnetwork.append(layers.ConvolutionLayer(
        W0, 32, 0, 1, 1, 1, 1, 0))  # in_dim, pad, stride, wbits, ibits, obits
    sfcnetwork.append(layers.ConvolutionLayer(W1, 30, 0, 1, 1, 1, 1, 0))
    sfcnetwork.append(layers.ConvolutionLayer(W2, 14, 0, 1, 1, 1, 1, 0))
    sfcnetwork.append(layers.ConvolutionLayer(W3, 12, 0, 1, 1, 1, 1, 0))
    sfcnetwork.append(layers.ConvolutionLayer(W4, 5, 0, 1, 1, 1, 1, 0))
    sfcnetwork.append(layers.ConvolutionLayer(W5, 3, 0, 1, 1, 1, 1, 0))

    sfcnetwork.append(layers.FullyConnectedLayer(W6, 1, 1, 1))
    sfcnetwork.append(layers.FullyConnectedLayer(W7, 1, 1, 1))
    sfcnetwork.append(layers.FullyConnectedLayer(W8, 1, 1, 1))

    net = FINN.core.nn.NN(layers=sfcnetwork)

    dev = device.Device('XLNX:VU9P.json', frequency=248.5)  # Measured on AWS
    perf = perf_model.PerfModel(net, dev)

    fps = perf.maximise_fps()

    # From BNN spreadsheet, t3
    perf.SIMD[0] = 3
    perf.SIMD[1] = 64
    perf.SIMD[2] = 64
    perf.SIMD[3] = 64
    perf.SIMD[4] = 64
    perf.SIMD[5] = 64
    perf.SIMD[6] = 16
    perf.SIMD[7] = 16
    perf.SIMD[8] = 16

    perf.PE[0] = 64
    perf.PE[1] = 64
    perf.PE[2] = 64
    perf.PE[3] = 64
    perf.PE[4] = 64
    perf.PE[5] = 64
    perf.PE[6] = 16
    perf.PE[7] = 16
    perf.PE[8] = 4

    perf.MMV[0] = 1
    perf.MMV[1] = 1
    perf.MMV[2] = 1
    perf.MMV[3] = 1
    perf.MMV[4] = 1
    perf.MMV[5] = 1
    perf.MMV[6] = 1
    perf.MMV[7] = 1
    perf.MMV[8] = 1

    # FPS given the above folding factors
    fps = perf.fps()

    perf.nswg.calculate_neural_folding()
    perf.nswg.calculate_write_block_cycles()
    perf.nswg.calculate_read_block_cycles()
    perf.nswg.calculate_total_cycles()
    perf.nswg.calculate_input_multipliers()
    perf.print_folding_factors()
    perf.print_hardware_cost()
    perf.print_topology()
    perf.print_cycles()
    fps = perf.fps()

    print(perf.nswg)

    print "Achieved fps of %f with %f%% LUT utilisation and %f%% BRAM utilisation at %f Mhz" % (
        fps, perf.network_utilisation()['luts'] / dev.luts * 100,
        perf.network_utilisation()['brams'] / dev.brams * 100, dev.frequency)
Esempio n. 8
0
def demo_dorefa():
    logging.basicConfig(
        filename='FINN.log',
        level=logging.INFO)  # Changed WARNING to INFO if you want logging
    dorefanetwork = []

    W0 = np.zeros((68, 3, 12, 12))  # out, in, kernel, kernel
    W1 = np.zeros((90, 34, 5, 5))
    W2 = np.zeros((272, 180, 3, 3))
    W3 = np.zeros((192, 136, 3, 3))
    W4 = np.zeros((128, 192, 3, 3))
    W5 = np.zeros((4096, 9216))
    W6 = np.zeros((4096, 4096))
    W7 = np.zeros((1000, 4096))

    dorefanetwork.append(layers.ConvolutionLayer(
        W0, 227, 0, 4, 1, 1, 1, 0))  # in_dim, pad, stride, wbits, ibits, obits
    dorefanetwork.append(layers.ConvolutionLayer(W1, 58, 0, 1, 1, 1, 1, 0))
    dorefanetwork[-1].parallel = 2
    dorefanetwork.append(layers.ConvolutionLayer(W2, 29, 0, 1, 1, 1, 1, 0))
    dorefanetwork.append(layers.ConvolutionLayer(W3, 16, 0, 1, 1, 1, 1, 0))
    dorefanetwork[-1].parallel = 2
    dorefanetwork.append(layers.ConvolutionLayer(W4, 16, 0, 1, 1, 1, 1, 0))
    dorefanetwork[-1].parallel = 2

    dorefanetwork.append(layers.FullyConnectedLayer(W5, 1, 1, 1))
    dorefanetwork.append(layers.FullyConnectedLayer(W6, 1, 1, 1))
    dorefanetwork.append(layers.FullyConnectedLayer(W7, 1, 1, 1))

    net = FINN.core.nn.NN(layers=dorefanetwork)

    dev = device.Device('XLNX:VU9P.json', frequency=101)  # Measured on AWS
    perf = perf_model.PerfModel(net, dev)

    # From BNN spreadsheet, t3
    perf.SIMD[0] = 3
    perf.SIMD[1] = 34
    perf.SIMD[2] = 45
    perf.SIMD[3] = 34
    perf.SIMD[4] = 64
    perf.SIMD[5] = 64
    perf.SIMD[6] = 64
    perf.SIMD[7] = 8

    perf.PE[0] = 68
    perf.PE[1] = 90
    perf.PE[2] = 136
    perf.PE[3] = 64
    perf.PE[4] = 32
    perf.PE[5] = 32
    perf.PE[6] = 16
    perf.PE[7] = 32

    perf.MMV[0] = 18
    perf.MMV[1] = 3
    perf.MMV[2] = 3
    perf.MMV[3] = 1
    perf.MMV[4] = 1
    perf.MMV[5] = 1
    perf.MMV[6] = 1
    perf.MMV[7] = 1

    # FPS given the above folding factors
    fps = perf.fps()

    perf.nswg.calculate_neural_folding()
    perf.nswg.calculate_write_block_cycles()
    perf.nswg.calculate_read_block_cycles()
    perf.nswg.calculate_total_cycles()
    perf.nswg.calculate_input_multipliers()
    perf.print_folding_factors()
    perf.print_hardware_cost()
    perf.print_topology()
    perf.print_cycles()
    fps = perf.fps()

    print(perf.nswg)
    print "Achieved fps of %f with %f%% LUT utilisation and %f%% BRAM utilisation at %f Mhz" % (
        fps, perf.network_utilisation()['luts'] / dev.luts * 100,
        perf.network_utilisation()['brams'] / dev.brams * 100, dev.frequency)
Esempio n. 9
0
#       derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import FINN.core.perf_model as pm
import FINN.core.device as device
import FINN.core.nn as nn
from FINN.frontend.caffeloader import CaffeLoader

print "Hardware model"

frequency = 200
#dev = device.Device('XLNX:PYNQ-Z1.json', frequency)
dev = device.Device('XLNX:VU9P.json', frequency)
net = nn.NN(CaffeLoader(None, './FINN/inputs/lfc-w1a1.prototxt'))
perfmodel = pm.PerfModel(net, dev)
fps = perfmodel.maximise_fps()
print "Network Utilization"

print perfmodel.network_utilisation()['luts']/dev.luts*100
print perfmodel.network_utilisation()['brams']/dev.brams*100