def main():
    # config
    binary_mode = True
    epoch = 4
    mini_batch = 32
    training_modulation_size = 3
    inference_modulation_size = 3

    # load MNIST data
    td = bb.LoadMnist.load()

    batch_size = len(td.x_train)
    print('batch_size =', batch_size)

    ############################
    # Learning
    ############################

    # create network
    main_net = bb.Sequential.create()
    main_net.add(bb.DenseAffine.create(output_shape=[1024]))
    main_net.add(bb.BatchNormalization.create())
    main_net.add(bb.ReLU.create())
    main_net.add(bb.DenseAffine.create([512]))
    main_net.add(bb.BatchNormalization.create())
    main_net.add(bb.ReLU.create())
    main_net.add(bb.DenseAffine.create(td.t_shape))
    if binary_mode:
        main_net.add(bb.BatchNormalization.create())
        main_net.add(bb.ReLU.create())

    # wrapping with binary modulator
    net = bb.Sequential.create()
    net.add(
        bb.BinaryModulation.create(
            main_net, training_modulation_size=training_modulation_size))
    net.add(bb.Reduce.create(td.t_shape))
    net.set_input_shape(td.x_shape)

    # print model information
    print(net.get_info())

    # set binary mode
    if binary_mode:
        net.send_command("binary true")
    else:
        net.send_command("binary false")

    # learning
    print('\n[learning]')
    loss = bb.LossSoftmaxCrossEntropy.create()
    metrics = bb.MetricsCategoricalAccuracy.create()
    optimizer = bb.OptimizerAdam.create()
    optimizer.set_variables(net.get_parameters(), net.get_gradients())

    runner = bb.Runner(net, "mnist-sparse-lut6-simple", loss, metrics,
                       optimizer)
    runner.fitting(td, epoch_size=epoch, mini_batch_size=mini_batch)
net.add(bb.LoweringConvolution.create(cnv2_sub, 3, 3))
net.add(bb.LoweringConvolution.create(cnv3_sub, 3, 3))
net.add(bb.MaxPooling.create(2, 2))
net.add(bb.DenseAffine.create([512]))
net.add(bb.BatchNormalization.create())
net.add(bb.ReLU.create())
net.add(bb.DenseAffine.create([10]))

net.set_input_shape(td['x_shape'])

# set no binary mode
net.send_command("binary false")

# print model information
print(net.get_info())

# learning
print('\n[learning]')

loss = bb.LossSoftmaxCrossEntropy.create()
metrics = bb.MetricsCategoricalAccuracy.create()
optimizer = bb.OptimizerAdam.create()
optimizer.set_variables(net.get_parameters(), net.get_gradients())

runner = bb.Runner(net, "cifar10-fp32-dense-cnn", loss, metrics, optimizer)
runner.fitting(td,
               epoch_size=epoch,
               mini_batch_size=mini_batch,
               file_read=file_read,
               file_write=file_write)
Esempio n. 3
0
net.add(bb.BinaryModulation.create(main_net, training_modulation_size=15))
net.add(bb.Reduce.create(td.t_shape))
net.set_input_shape(td.x_shape)

print(net.get_info())

loss = bb.LossSoftmaxCrossEntropy.create()
metrics = bb.MetricsCategoricalAccuracy.create()
optimizer = bb.OptimizerAdam.create()

optimizer.set_variables(net.get_parameters(), net.get_gradients())

batch_size = len(td.x_train)
print('batch_size =', batch_size)

runner = bb.Runner(net, "mnist-mlp-sparse-lut6", loss, metrics, optimizer)
runner.fitting(td, epoch_size=3, mini_batch_size=16)

sys.exit(0)

if False:
    runner = bb.CRunner.create("mnist-mlp-sparse-lut6", net, loss, metrics,
                               optimizer)
    runner.fitting(td, epoch_size=1, batch_size=16)

loss.clear()
metrics.clear()

x_train = td.x_train
t_train = td.t_train
Esempio n. 4
0
def main():
    # config
    epoch                     = 4
    mini_batch                = 32
    training_modulation_size  = 3
    inference_modulation_size = 3
    
    # load data
    td = bb.load_cifar10()

    batch_size = len(td['x_train'])
    print('batch_size =', batch_size)
    
    
    
    ############################
    # Learning
    ############################
    
    # create layer
    layer_sl0 = bb.SparseLut6.create([1024])
    layer_sl1 = bb.SparseLut6.create([480])
    layer_sl2 = bb.SparseLut6.create([70])
    
    # create network
    main_net = bb.Sequential.create()
    main_net.add(layer_sl0)
    main_net.add(layer_sl1)
    main_net.add(layer_sl2)
    
    # wrapping with binary modulator
    net = bb.Sequential.create()
    net.add(bb.BinaryModulation.create(main_net, training_modulation_size=training_modulation_size))
    net.add(bb.Reduce.create(td['t_shape']))
    net.set_input_shape(td['x_shape'])
    
    # set binary mode
    net.send_command('binary true')
    
    # print model information
    print(net.get_info())
    
    # learning
    print('\n[learning]')
    loss      = bb.LossSoftmaxCrossEntropy.create()
    metrics   = bb.MetricsCategoricalAccuracy.create()
    optimizer = bb.OptimizerAdam.create()
    optimizer.set_variables(net.get_parameters(), net.get_gradients())
    
    runner = bb.Runner(net, "cifar10-sparse-lut6-simple", loss, metrics, optimizer)
    runner.fitting(td, epoch_size=epoch, mini_batch_size=mini_batch, file_read=True, file_write=True)
    
    
    ################################
    # convert to FPGA
    ################################
    
    print('\n[convert to Binary LUT]')
    
    # LUT-network
    layer_bl0 = bb.BinaryLut6.create(layer_sl0.get_output_shape())
    layer_bl1 = bb.BinaryLut6.create(layer_sl1.get_output_shape())
    layer_bl2 = bb.BinaryLut6.create(layer_sl2.get_output_shape())
    
    lut_net = bb.Sequential.create()
    lut_net.add(layer_bl0)
    lut_net.add(layer_bl1)
    lut_net.add(layer_bl2)
    
    # evaluate network
    eval_net = bb.Sequential.create()
    eval_net.add(bb.BinaryModulation.create(lut_net, inference_modulation_size=inference_modulation_size))
    eval_net.add(bb.Reduce.create(td['t_shape']))
    
    # set input shape
    eval_net.set_input_shape(td['x_shape'])
    
    # parameter copy
    print('parameter copy to binary LUT-Network')
    layer_bl0.import_parameter(layer_sl0)
    layer_bl1.import_parameter(layer_sl1)
    layer_bl2.import_parameter(layer_sl2)
    
    # evaluate network
    print('evaluate LUT-Network')
    lut_runner = bb.Runner(eval_net, "cifar10-binary-lut6-simple",
                    bb.LossSoftmaxCrossEntropy.create(),
                    bb.MetricsCategoricalAccuracy.create())
    lut_runner.evaluation(td, mini_batch_size=mini_batch)
    
    # write Verilog
    print('write verilog file')
    with open('Cifar10LutSimple.v', 'w') as f:
        f.write('`timescale 1ns / 1ps\n\n')
        f.write(bb.make_verilog_from_lut('Cifar10LutSimple', [layer_bl0, layer_bl1, layer_bl2]))
Esempio n. 5
0
def main():
    epoch = 1
    mini_batch = 32
    training_modulation_size = 1
    inference_modulation_size = 1

    # load MNIST data
    td = bb.LoadMnist.load()

    batch_size = len(td.x_train)
    print('batch_size =', batch_size)

    ############################
    # Learning
    ############################

    # create layer
    layer_cnv0_sl0 = bb.SparseLut6.create([192])
    layer_cnv0_sl1 = bb.SparseLut6.create([32])
    layer_cnv1_sl0 = bb.SparseLut6.create([192])
    layer_cnv1_sl1 = bb.SparseLut6.create([32])
    layer_cnv2_sl0 = bb.SparseLut6.create([384])
    layer_cnv2_sl1 = bb.SparseLut6.create([64])
    layer_cnv3_sl0 = bb.SparseLut6.create([384])
    layer_cnv3_sl1 = bb.SparseLut6.create([64])
    layer_sl4 = bb.SparseLut6.create([420])
    layer_sl5 = bb.SparseLut6.create([70])

    # main network
    cnv0_sub = bb.Sequential.create()
    cnv0_sub.add(layer_cnv0_sl0)
    cnv0_sub.add(layer_cnv0_sl1)

    cnv1_sub = bb.Sequential.create()
    cnv1_sub.add(layer_cnv1_sl0)
    cnv1_sub.add(layer_cnv1_sl1)

    cnv2_sub = bb.Sequential.create()
    cnv2_sub.add(layer_cnv2_sl0)
    cnv2_sub.add(layer_cnv2_sl1)

    cnv3_sub = bb.Sequential.create()
    cnv3_sub.add(layer_cnv3_sl0)
    cnv3_sub.add(layer_cnv3_sl1)

    main_net = bb.Sequential.create()
    main_net.add(bb.LoweringConvolution.create(cnv0_sub, 3, 3))
    main_net.add(bb.LoweringConvolution.create(cnv1_sub, 3, 3))
    main_net.add(bb.MaxPooling.create(2, 2))
    main_net.add(bb.LoweringConvolution.create(cnv2_sub, 3, 3))
    main_net.add(bb.LoweringConvolution.create(cnv3_sub, 3, 3))
    main_net.add(bb.MaxPooling.create(2, 2))
    main_net.add(layer_sl4)
    main_net.add(layer_sl5)

    # wrapping with binary modulator
    net = bb.Sequential.create()
    net.add(
        bb.BinaryModulation.create(
            main_net, training_modulation_size=training_modulation_size))
    net.add(bb.Reduce.create(td.t_shape))
    net.set_input_shape(td.x_shape)

    # print model information
    print(net.get_info())

    # learning
    print('\n[learning]')

    loss = bb.LossSoftmaxCrossEntropy.create()
    metrics = bb.MetricsCategoricalAccuracy.create()
    optimizer = bb.OptimizerAdam.create()
    optimizer.set_variables(net.get_parameters(), net.get_gradients())

    runner = bb.Runner(net, "mnist-sparse-lut6-cnn", loss, metrics, optimizer)
    runner.fitting(td, epoch_size=epoch, mini_batch_size=mini_batch)

    ################################
    # convert to FPGA
    ################################

    print('\n[convert to Binary LUT]')

    # LUT-network
    layer_cnv0_bl0 = bb.BinaryLut6.create(layer_cnv0_sl0.get_output_shape())
    layer_cnv0_bl1 = bb.BinaryLut6.create(layer_cnv0_sl1.get_output_shape())
    layer_cnv1_bl0 = bb.BinaryLut6.create(layer_cnv1_sl0.get_output_shape())
    layer_cnv1_bl1 = bb.BinaryLut6.create(layer_cnv1_sl1.get_output_shape())
    layer_cnv2_bl0 = bb.BinaryLut6.create(layer_cnv2_sl0.get_output_shape())
    layer_cnv2_bl1 = bb.BinaryLut6.create(layer_cnv2_sl1.get_output_shape())
    layer_cnv3_bl0 = bb.BinaryLut6.create(layer_cnv3_sl0.get_output_shape())
    layer_cnv3_bl1 = bb.BinaryLut6.create(layer_cnv3_sl1.get_output_shape())
    layer_bl4 = bb.BinaryLut6.create(layer_sl4.get_output_shape())
    layer_bl5 = bb.BinaryLut6.create(layer_sl5.get_output_shape())

    cnv0_sub = bb.Sequential.create()
    cnv0_sub.add(layer_cnv0_bl0)
    cnv0_sub.add(layer_cnv0_bl1)

    cnv1_sub = bb.Sequential.create()
    cnv1_sub.add(layer_cnv1_bl0)
    cnv1_sub.add(layer_cnv1_bl1)

    cnv2_sub = bb.Sequential.create()
    cnv2_sub.add(layer_cnv2_bl0)
    cnv2_sub.add(layer_cnv2_bl1)

    cnv3_sub = bb.Sequential.create()
    cnv3_sub.add(layer_cnv3_bl0)
    cnv3_sub.add(layer_cnv3_bl1)

    cnv4_sub = bb.Sequential.create()
    cnv4_sub.add(layer_bl4)
    cnv4_sub.add(layer_bl5)

    cnv0 = bb.LoweringConvolution.create(cnv0_sub, 3, 3)
    cnv1 = bb.LoweringConvolution.create(cnv1_sub, 3, 3)
    pol0 = bb.MaxPooling.create(2, 2)

    cnv2 = bb.LoweringConvolution.create(cnv2_sub, 3, 3)
    cnv3 = bb.LoweringConvolution.create(cnv3_sub, 3, 3)
    pol1 = bb.MaxPooling.create(2, 2)

    cnv4 = bb.LoweringConvolution.create(cnv4_sub, 4, 4)

    lut_net = bb.Sequential.create()
    lut_net.add(cnv0)
    lut_net.add(cnv1)
    lut_net.add(pol0)
    lut_net.add(cnv2)
    lut_net.add(cnv3)
    lut_net.add(pol1)
    lut_net.add(cnv4)

    # evaluate network
    eval_net = bb.Sequential.create()
    eval_net.add(
        bb.BinaryModulation.create(
            lut_net, inference_modulation_size=inference_modulation_size))
    eval_net.add(bb.Reduce.create(td.t_shape))

    # set input shape
    eval_net.set_input_shape(td.x_shape)

    # parameter copy
    print('parameter copy to binary LUT-Network')
    layer_cnv0_bl0.import_parameter(layer_cnv0_sl0)
    layer_cnv0_bl1.import_parameter(layer_cnv0_sl1)
    layer_cnv1_bl0.import_parameter(layer_cnv1_sl0)
    layer_cnv1_bl1.import_parameter(layer_cnv1_sl1)
    layer_cnv2_bl0.import_parameter(layer_cnv2_sl0)
    layer_cnv2_bl1.import_parameter(layer_cnv2_sl1)
    layer_cnv3_bl0.import_parameter(layer_cnv3_sl0)
    layer_cnv3_bl1.import_parameter(layer_cnv3_sl1)
    layer_bl4.import_parameter(layer_sl4)
    layer_bl5.import_parameter(layer_sl5)

    # evaluate network
    print('evaluate LUT-Network')
    lut_runner = bb.Runner(eval_net, "mnist-binary-lut6-cnn",
                           bb.LossSoftmaxCrossEntropy.create(),
                           bb.MetricsCategoricalAccuracy.create())
    lut_runner.evaluation(td, mini_batch_size=mini_batch)

    # write Verilog
    print('write verilog file')
    with open('MnistLutCnn.v', 'w') as f:
        f.write('`timescale 1ns / 1ps\n\n')
        f.write(
            bb.get_verilog_axi4s_from_lut_cnn('MnistLutCnnCnv0',
                                              [cnv0, cnv1, pol0]))
        f.write(
            bb.get_verilog_axi4s_from_lut_cnn('MnistLutCnnCnv1',
                                              [cnv2, cnv3, pol1]))
        f.write(bb.get_verilog_axi4s_from_lut_cnn('MnistLutCnnCnv2', [cnv4]))
def main():
    epoch = 4
    mini_batch = 32
    training_modulation_size = 7
    test_modulation_size = 7

    # load MNIST data
    td = bb.load_mnist()

    # create layer
    layer_sl0 = bb.SparseLut6.create([1024])
    layer_sl1 = bb.SparseLut6.create([480])
    layer_sl2 = bb.SparseLut6.create([70])

    # create network
    main_net = bb.Sequential.create()
    main_net.add(layer_sl0)
    main_net.add(layer_sl1)
    main_net.add(layer_sl2)

    # wrapping with binary modulator
    net = bb.Sequential.create()
    net.add(
        bb.BinaryModulation.create(
            main_net, training_modulation_size=training_modulation_size))
    net.add(bb.Reduce.create(td['t_shape']))
    net.set_input_shape(td['x_shape'])

    print(net.get_info())

    loss = bb.LossSoftmaxCrossEntropy.create()
    metrics = bb.MetricsCategoricalAccuracy.create()
    optimizer = bb.OptimizerAdam.create()

    optimizer.set_variables(net.get_parameters(), net.get_gradients())

    batch_size = len(td['x_train'])
    print('batch_size =', batch_size)

    runner = bb.Runner(net, "mnist-sparse-lut6-simple", loss, metrics,
                       optimizer)
    runner.fitting(td, epoch_size=epoch, mini_batch_size=mini_batch)

    # LUT-network
    layer_bl0 = bb.BinaryLut6Bit.create(layer_sl0.get_output_shape())
    layer_bl1 = bb.BinaryLut6Bit.create(layer_sl1.get_output_shape())
    layer_bl2 = bb.BinaryLut6Bit.create(layer_sl2.get_output_shape())

    lut_net = bb.Sequential.create()
    lut_net.add(layer_bl0)
    lut_net.add(layer_bl1)
    lut_net.add(layer_bl2)

    # evaluation network
    eval_net = bb.Sequential.create()
    eval_net.add(
        bb.BinaryModulationBit.create(
            lut_net, inference_modulation_size=test_modulation_size))
    eval_net.add(bb.Reduce.create(td['t_shape']))

    # set input shape
    eval_net.set_input_shape(td['x_shape'])

    # import table
    print('parameter copy to binary LUT-Network')
    layer_bl0.import_parameter(layer_sl0)
    layer_bl1.import_parameter(layer_sl1)
    layer_bl2.import_parameter(layer_sl2)

    # evaluation
    lut_runner = bb.Runner(eval_net, "mnist-binary-lut6-simple",
                           bb.LossSoftmaxCrossEntropy.create(),
                           bb.MetricsCategoricalAccuracy.create())
    lut_runner.evaluation(td, mini_batch_size=mini_batch)

    # Verilog 出力
    with open('MnistLutSimple.v', 'w') as f:
        f.write('`timescale 1ns / 1ps\n\n')
        f.write(
            bb.make_verilog_from_lut_bit('MnistLutSimple',
                                         [layer_bl0, layer_bl1, layer_bl2]))
Esempio n. 7
0
def main():
    binary_mode = False
    epoch = 8
    mini_batch = 32
    training_modulation_size = 3
    inference_modulation_size = 3

    # load data
    td = bb.load_cifar10()

    batch_size = len(td['x_train'])
    print('batch_size =', batch_size)

    ############################
    # Learning
    ############################

    # create layer
    layer_cnv0_affine = bb.DenseAffine.create([32])
    layer_cnv0_batchnorm = bb.BatchNormalization.create()
    layer_cnv0_actibation = bb.ReLU.create()

    layer_cnv1_affine = bb.DenseAffine.create([32])
    layer_cnv1_batchnorm = bb.BatchNormalization.create()
    layer_cnv1_actibation = bb.ReLU.create()

    layer_cnv2_affine = bb.DenseAffine.create([64])
    layer_cnv2_batchnorm = bb.BatchNormalization.create()
    layer_cnv2_actibation = bb.ReLU.create()

    layer_cnv3_affine = bb.DenseAffine.create([64])
    layer_cnv3_batchnorm = bb.BatchNormalization.create()
    layer_cnv3_actibation = bb.ReLU.create()

    layer_cnv3_affine = bb.DenseAffine.create([512])
    layer_cnv3_batchnorm = bb.BatchNormalization.create()
    layer_cnv3_actibation = bb.ReLU.create()

    # main network
    cnv0_sub = bb.Sequential.create()
    cnv0_sub.add(layer_cnv0_affine)
    cnv0_sub.add(layer_cnv0_batchnorm)
    cnv0_sub.add(layer_cnv0_actibation)

    cnv1_sub = bb.Sequential.create()
    cnv1_sub.add(layer_cnv1_affine)
    cnv1_sub.add(layer_cnv1_batchnorm)
    cnv1_sub.add(layer_cnv1_actibation)

    cnv2_sub = bb.Sequential.create()
    cnv2_sub.add(layer_cnv2_affine)
    cnv2_sub.add(layer_cnv2_batchnorm)
    cnv2_sub.add(layer_cnv2_actibation)

    cnv3_sub = bb.Sequential.create()
    cnv3_sub.add(layer_cnv3_affine)
    cnv3_sub.add(layer_cnv3_batchnorm)
    cnv3_sub.add(layer_cnv3_actibation)

    main_net = bb.Sequential.create()
    main_net.add(bb.LoweringConvolution.create(cnv0_sub, 3, 3))
    main_net.add(bb.LoweringConvolution.create(cnv1_sub, 3, 3))
    main_net.add(bb.MaxPooling.create(2, 2))
    main_net.add(bb.LoweringConvolution.create(cnv2_sub, 3, 3))
    main_net.add(bb.LoweringConvolution.create(cnv3_sub, 3, 3))
    main_net.add(bb.MaxPooling.create(2, 2))
    main_net.add(bb.DenseAffine.create([512]))
    main_net.add(bb.BatchNormalization.create())
    main_net.add(bb.ReLU.create())
    main_net.add(bb.DenseAffine.create([10]))
    if binary_mode:
        main_net.add(bb.ReLU.create())

    # wrapping with binary modulator
    net = bb.Sequential.create()
    net.add(
        bb.BinaryModulation.create(
            main_net, training_modulation_size=training_modulation_size))
    net.add(bb.Reduce.create(td['t_shape']))
    net.set_input_shape(td['x_shape'])

    # set no binary mode
    if binary_mode:
        net.send_command("binary true")
    else:
        net.send_command("binary false")

    # print model information
    print(net.get_info())

    # learning
    print('\n[learning]')

    loss = bb.LossSoftmaxCrossEntropy.create()
    metrics = bb.MetricsCategoricalAccuracy.create()
    optimizer = bb.OptimizerAdam.create()
    optimizer.set_variables(net.get_parameters(), net.get_gradients())

    runner = bb.Runner(net, "cifar10-dense-cnn", loss, metrics, optimizer)
    runner.fitting(td,
                   epoch_size=epoch,
                   mini_batch_size=mini_batch,
                   file_read=True,
                   file_write=True)
Esempio n. 8
0
def main():
    epoch = 4
    mini_batch = 32
    training_modulation_size = 3
    inference_modulation_size = 3

    # load data
    td = bb.load_cifar10()

    batch_size = len(td['x_train'])
    print('batch_size =', batch_size)

    ############################
    # Learning
    ############################

    # create layer
    layer_cnv0_sl0 = bb.SparseLut6Bit.create([192])
    layer_cnv0_sl1 = bb.SparseLut6Bit.create([32])

    layer_cnv1_sl0 = bb.SparseLut6Bit.create([1152])
    layer_cnv1_sl1 = bb.SparseLut6Bit.create([192])
    layer_cnv1_sl2 = bb.SparseLut6Bit.create([32])

    layer_cnv2_sl0 = bb.SparseLut6Bit.create([2304])
    layer_cnv2_sl1 = bb.SparseLut6Bit.create([384])
    layer_cnv2_sl2 = bb.SparseLut6Bit.create([64])

    layer_cnv3_sl0 = bb.SparseLut6Bit.create([2384])
    layer_cnv3_sl1 = bb.SparseLut6Bit.create([384])
    layer_cnv3_sl2 = bb.SparseLut6Bit.create([64])

    layer_sl4 = bb.SparseLut6Bit.create([18432])
    layer_sl5 = bb.SparseLut6Bit.create([3072])
    layer_sl6 = bb.SparseLut6Bit.create([512])

    layer_sl7 = bb.SparseLut6Bit.create([2160])
    layer_sl8 = bb.SparseLut6Bit.create([360])
    layer_sl9 = bb.SparseLut6Bit.create([60])
    layer_sl10 = bb.SparseLut6Bit.create([10])

    # main network
    cnv0_sub = bb.Sequential.create()
    cnv0_sub.add(layer_cnv0_sl0)
    cnv0_sub.add(layer_cnv0_sl1)

    cnv1_sub = bb.Sequential.create()
    cnv1_sub.add(layer_cnv1_sl0)
    cnv1_sub.add(layer_cnv1_sl1)
    cnv1_sub.add(layer_cnv1_sl2)

    cnv2_sub = bb.Sequential.create()
    cnv2_sub.add(layer_cnv2_sl0)
    cnv2_sub.add(layer_cnv2_sl1)
    cnv2_sub.add(layer_cnv2_sl2)

    cnv3_sub = bb.Sequential.create()
    cnv3_sub.add(layer_cnv3_sl0)
    cnv3_sub.add(layer_cnv3_sl1)
    cnv3_sub.add(layer_cnv3_sl2)

    main_net = bb.Sequential.create()
    main_net.add(bb.LoweringConvolutionBit.create(cnv0_sub, 3, 3))
    main_net.add(bb.LoweringConvolutionBit.create(cnv1_sub, 3, 3))
    main_net.add(bb.MaxPoolingBit.create(2, 2))
    main_net.add(bb.LoweringConvolutionBit.create(cnv2_sub, 3, 3))
    main_net.add(bb.LoweringConvolutionBit.create(cnv3_sub, 3, 3))
    main_net.add(bb.MaxPoolingBit.create(2, 2))
    main_net.add(layer_sl4)
    main_net.add(layer_sl5)
    main_net.add(layer_sl6)
    main_net.add(layer_sl7)
    main_net.add(layer_sl8)
    main_net.add(layer_sl9)
    main_net.add(layer_sl10)

    # wrapping with binary modulator
    net = bb.Sequential.create()
    net.add(
        bb.BinaryModulationBit.create(
            main_net, training_modulation_size=training_modulation_size))
    net.add(bb.Reduce.create(td['t_shape']))
    net.set_input_shape(td['x_shape'])

    # set binary mode
    net.send_command("binary true")

    # print model information
    print(net.get_info())

    # learning
    print('\n[learning]')

    loss = bb.LossSoftmaxCrossEntropy.create()
    metrics = bb.MetricsCategoricalAccuracy.create()
    optimizer = bb.OptimizerAdam.create()
    optimizer.set_variables(net.get_parameters(), net.get_gradients())

    runner = bb.Runner(net, "cifar10-sparse-lut6-cnn", loss, metrics,
                       optimizer)
    runner.fitting(td,
                   epoch_size=epoch,
                   mini_batch_size=mini_batch,
                   file_read=True,
                   file_write=True)
net.add(bb.StochasticLut6.create([10 * 6 * 6]))
net.add(bb.StochasticBatchNormalization.create())
net.add(bb.StochasticLut6.create([10 * 6]))
net.add(bb.StochasticBatchNormalization.create())
net.add(bb.StochasticLut6.create([10]))

net.set_input_shape(td['x_shape'])

# set no binary mode
net.send_command("binary false")
net.send_command("lut_binarize false")

# print model information
print(net.get_info())

# learning
print('\n[learning]')

loss = bb.LossSoftmaxCrossEntropy.create()
metrics = bb.MetricsCategoricalAccuracy.create()
optimizer = bb.OptimizerAdam.create()
optimizer.set_variables(net.get_parameters(), net.get_gradients())

runner = bb.Runner(net, "cifar10-fp32-sparse-lut-cnn", loss, metrics,
                   optimizer)
runner.fitting(td,
               epoch_size=epoch,
               mini_batch_size=mini_batch,
               file_read=file_read,
               file_write=file_write)
Esempio n. 10
0
def main():
    epoch = 8
    mini_batch = 32
    training_modulation_size = 7
    test_modulation_size = 7

    # load MNIST data
    td = bb.load_mnist()

    # set teaching signnal
    td['t_shape'] = td['x_shape']
    td['t_train'] = td['x_train']
    td['t_test'] = td['x_test']

    # create layer
    layer_enc_sl0 = bb.SparseLut6.create([32 * 6 * 6 * 6])
    layer_enc_sl1 = bb.SparseLut6.create([32 * 6 * 6])
    layer_enc_sl2 = bb.SparseLut6.create([32 * 6])
    layer_enc_sl3 = bb.SparseLut6.create([32])

    layer_dec_sl2 = bb.SparseLut6.create([28 * 28 * 6 * 6])
    layer_dec_sl1 = bb.SparseLut6.create([28 * 28 * 6])
    layer_dec_sl0 = bb.SparseLut6.create([28 * 28], False)  # diable BatchNorm

    # create network
    main_net = bb.Sequential.create()
    main_net.add(layer_enc_sl0)
    main_net.add(layer_enc_sl1)
    main_net.add(layer_enc_sl2)
    main_net.add(layer_enc_sl3)
    main_net.add(layer_dec_sl2)
    main_net.add(layer_dec_sl1)
    main_net.add(layer_dec_sl0)

    # wrapping with binary modulator
    net = bb.Sequential.create()
    net.add(
        bb.BinaryModulation.create(
            main_net, training_modulation_size=training_modulation_size))
    net.set_input_shape(td['x_shape'])

    print(net.get_info())

    loss = bb.LossMeanSquaredError.create()
    metrics = bb.MetricsMeanSquaredError.create()
    optimizer = bb.OptimizerAdam.create()

    optimizer.set_variables(net.get_parameters(), net.get_gradients())

    batch_size = len(td['x_train'])
    print('batch_size =', batch_size)

    #    runner = bb.Runner(net, "mnist-autoencoder-sparse-lut6-simple", loss, metrics, optimizer)
    #    runner.fitting(td, epoch_size=epoch, mini_batch_size=mini_batch)

    result_img = None

    x_train = td['x_train']
    t_train = td['t_train']
    x_test = td['x_test']
    t_test = td['t_test']
    x_buf = bb.FrameBuffer()
    t_buf = bb.FrameBuffer()
    for epoch_num in range(epoch):
        # train
        for index in tqdm(range(0, batch_size, mini_batch)):
            mini_batch_size = min(mini_batch, batch_size - index)

            x_buf.resize(mini_batch_size, td['x_shape'], bb.TYPE_FP32)
            x_buf.set_data(x_train[index:index + mini_batch_size])

            y_buf = net.forward(x_buf)

            t_buf.resize(mini_batch_size, td['t_shape'], bb.TYPE_FP32)
            t_buf.set_data(t_train[index:index + mini_batch_size])

            dy_buf = loss.calculate_loss(y_buf, t_buf, mini_batch_size)
            metrics.calculate_metrics(y_buf, t_buf)
            dx_buf = net.backward(dy_buf)

            optimizer.update()
            cv2.waitKey(1)

        print('loss =', loss.get_loss())
        print('metrics =', metrics.get_metrics())

        # test
        x_buf.resize(16, td['x_shape'], bb.TYPE_FP32)
        x_buf.set_data(x_test[0:16])
        y_buf = net.forward(x_buf)

        if result_img is None:
            x_img = make_image_block(x_buf.get_data())
            cv2.imwrite('mnist-autoencoder-sparse-lut6-simple_x.png',
                        x_img * 255)
            result_img = x_img

        y_img = make_image_block(y_buf.get_data())
        cv2.imwrite('mnist-autoencoder-sparse-lut6-simple_%d.png' % epoch_num,
                    y_img * 255)
        result_img = np.vstack((result_img, y_img))
        cv2.imshow('result_img', result_img)
        cv2.waitKey(1)

    cv2.destroyAllWindows()
    cv2.imwrite("mnist-autoencoder-sparse-lut6-simple.png", result_img * 255)

    # LUT-network
    layer_enc_bl0 = bb.BinaryLut6Bit.create(layer_enc_sl0.get_output_shape())
    layer_enc_bl1 = bb.BinaryLut6Bit.create(layer_enc_sl1.get_output_shape())
    layer_enc_bl2 = bb.BinaryLut6Bit.create(layer_enc_sl2.get_output_shape())
    layer_enc_bl3 = bb.BinaryLut6Bit.create(layer_enc_sl3.get_output_shape())
    layer_dec_bl2 = bb.BinaryLut6Bit.create(layer_dec_sl2.get_output_shape())
    layer_dec_bl1 = bb.BinaryLut6Bit.create(layer_dec_sl1.get_output_shape())
    layer_dec_bl0 = bb.BinaryLut6Bit.create(layer_dec_sl0.get_output_shape())

    lut_net = bb.Sequential.create()
    lut_net.add(layer_enc_bl0)
    lut_net.add(layer_enc_bl1)
    lut_net.add(layer_enc_bl2)
    lut_net.add(layer_enc_bl3)
    lut_net.add(layer_dec_bl2)
    lut_net.add(layer_dec_bl1)
    lut_net.add(layer_dec_bl0)

    # evaluation network
    eval_net = bb.Sequential.create()
    eval_net.add(
        bb.BinaryModulationBit.create(
            lut_net, inference_modulation_size=test_modulation_size))
    eval_net.add(bb.Reduce.create(td['t_shape']))

    # set input shape
    eval_net.set_input_shape(td['x_shape'])

    # import table
    print('parameter copy to binary LUT-Network')
    layer_enc_bl0.import_parameter(layer_enc_sl0)
    layer_enc_bl1.import_parameter(layer_enc_sl1)
    layer_enc_bl2.import_parameter(layer_enc_sl2)
    layer_enc_bl3.import_parameter(layer_enc_sl3)
    layer_dec_bl2.import_parameter(layer_dec_sl2)
    layer_dec_bl1.import_parameter(layer_dec_sl1)
    layer_dec_bl0.import_parameter(layer_dec_sl0)

    # evaluation
    lut_runner = bb.Runner(eval_net, "mnist-autoencpder-binary-lut6-simple",
                           bb.LossMeanSquaredError.create(),
                           bb.MetricsMeanSquaredError.create())
    lut_runner.evaluation(td, mini_batch_size=mini_batch)

    # Verilog 出力
    with open('MnistAeLutSimple.v', 'w') as f:
        f.write('`timescale 1ns / 1ps\n\n')
        f.write(
            bb.make_verilog_from_lut_bit('MnistAeLutSimple', [
                layer_enc_bl0, layer_enc_bl1, layer_enc_bl2, layer_dec_bl2,
                layer_dec_bl1, layer_dec_bl0
            ]))