print('Building the CNN...') # Prepare Theano variables for inputs and targets input = T.tensor4('inputs') target = T.matrix('targets') LR = T.scalar('LR', dtype=theano.config.floatX) cnn = lasagne.layers.InputLayer(shape=(None, 3, 32, 32), input_var=input) # 128C3-128C3-P2 cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=128, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=128,
def genCnv(input, num_outputs, learning_parameters): # A function to generate the cnv network topology which matches the overlay for the Pynq board. # WARNING: If you change this file, it's likely the resultant weights will not fit on the Pynq overlay. stochastic = False binary = True H = 1 activation = binary_net.binary_tanh_unit W_LR_scale = learning_parameters.W_LR_scale epsilon = learning_parameters.epsilon alpha = learning_parameters.alpha cnn = lasagne.layers.InputLayer(shape=(None, 3, 32, 32), input_var=input) # 64C3-64C3-P2 cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=64, filter_size=(3, 3), pad='valid', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=64, filter_size=(3, 3), pad='valid', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # 256C3-256C3-P2 cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=128, filter_size=(3, 3), pad='valid', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=128, filter_size=(3, 3), pad='valid', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # 256C3-256C3 cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=256, filter_size=(3, 3), pad='valid', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=256, filter_size=(3, 3), pad='valid', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # print(cnn.output_shape) # 1024FP-1024FP-10FP cnn = binary_net.DenseLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=512) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = binary_net.DenseLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=512) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = binary_net.DenseLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=num_outputs) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) return cnn
def buildCNN(networkType, dataType, input, epsilon, alpha, activation, binary, stochastic, H, W_LR_scale, oneHot=True): if oneHot: print("identity") denseOut = lasagne.nonlinearities.identity else: print("softmax") denseOut = lasagne.nonlinearities.softmax if dataType == 'TCDTIMIT': nbClasses = 39 cnn = lasagne.layers.InputLayer(shape=(None, 1, 120, 120), input_var=input) elif dataType == 'cifar10': nbClasses = 10 cnn = lasagne.layers.InputLayer(shape=(None, 3, 32, 32), input_var=input) if networkType == 'google': # conv 1 cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=128, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # conv 2 cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=256, filter_size=(3, 3), stride=(2, 2), pad=1, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # conv3 cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=512, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # conv 4 cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=512, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # conv 5 cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=512, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # FC layer cnn = binary_net.DenseLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=denseOut, #TODO was identity num_units=nbClasses) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) elif networkType == 'cifar10': # 128C3-128C3-P2 # 128C3-128C3-P2 cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=128, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=128, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # 256C3-256C3-P2 cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=256, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=256, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # 512C3-512C3-P2 cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=512, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=512, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # print(cnn.output_shape) # 1024FP-1024FP-10FP cnn = binary_net.DenseLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=1024) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = binary_net.DenseLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=1024) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = binary_net.DenseLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=denseOut, num_units=nbClasses) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) return cnn
def genCnv(input, num_outputs, learning_parameters): # A function to generate the cnv network topology which matches the overlay for the Pynq board. # WARNING: If you change this file, it's likely the resultant weights will not fit on the Pynq overlay. stochastic = False binary = True H = 1 activation = binary_net.binary_tanh_unit W_LR_scale = learning_parameters.W_LR_scale epsilon = learning_parameters.epsilon alpha = learning_parameters.alpha # Encoder cnn = lasagne.layers.InputLayer(shape=(None, 1, 28, 28), input_var=input) # 1st Layer cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=64, filter_size=(4, 4), pad='valid', stride=(2, 2), flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = lasagne.layers.DropoutLayer(cnn, p=0.2) print cnn.output_shape # 2nd Layer cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=64, filter_size=(4, 4), pad='valid', stride=(2, 2), flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = lasagne.layers.DropoutLayer(cnn, p=0.2) print cnn.output_shape # 3rd Layer cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=64, filter_size=(4, 4), pad='valid', stride=(1, 1), flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = lasagne.layers.DropoutLayer(cnn, p=0.2) print cnn.output_shape cnn = lasagne.layers.flatten(cnn) print cnn.output_shape # FC Layer cnn = binary_net.DenseLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=256) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) print cnn.output_shape # Deoceder cnn = lasagne.layers.ReshapeLayer(cnn, shape=(-1, 64, 2, 2)) print cnn.output_shape # 1st Deconv Layer cnn = binary_net.Deconv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=64, filter_size=(4, 4), crop='valid', stride=(2, 2), flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) print cnn.output_shape # 2nd Deconv Layer cnn = binary_net.Deconv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=64, filter_size=(4, 4), crop='valid', stride=(2, 2), flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) print cnn.output_shape # 3rd Deconv Layer cnn = binary_net.Deconv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=1, filter_size=(4, 4), crop='valid', stride=(2, 2), flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) print cnn.output_shape cnn = lasagne.layers.flatten(cnn) print cnn.output_shape # Last FC layer cnn = binary_net.DenseLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=num_outputs) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) print cnn.output_shape return cnn
# Prepare Theano variables for inputs and targets input = T.tensor4('inputs') target = T.matrix('targets') LR = T.scalar('LR', dtype=theano.config.floatX) cnn = lasagne.layers.InputLayer(shape=(None, 3, 32, 32), input_var=input) for i in range(len(sizes)): if i == 0: cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, weight_prec=weight_prec, H=H, W_LR_scale=W_LR_scale, num_filters=sizes[0], max_fan_in=0, act_noise=act_noise, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity) else: if layers[i] == 'c' or layers[i] == 'cm': cnn = Conv2DLayer_Fanin_Limited( cnn, binary=binary, stochastic=stochastic, weight_prec=weight_prec, H=H, W_LR_scale=W_LR_scale,
def run(binary=False, noise=None, nalpha=0, result_path=None): # BN parameters batch_size = 128 print("batch_size = " + str(batch_size)) # alpha is the exponential moving average factor alpha = .1 print("alpha = " + str(alpha)) epsilon = 1e-4 print("epsilon = " + str(epsilon)) # Training parameters num_epochs = 150 print("num_epochs = " + str(num_epochs)) # Dropout parameters dropout_in = .2 # default: .2 print("dropout_in = " + str(dropout_in)) dropout_hidden = .5 # default: .5 print("dropout_hidden = " + str(dropout_hidden)) # BinaryOut if binary: activation = binary_net.binary_tanh_unit print("activation = binary_net.binary_tanh_unit") else: activation = lasagne.nonlinearities.tanh print("activation = lasagne.nonlinearities.tanh") # BinaryConnect print("binary = " + str(binary)) stochastic = False print("stochastic = " + str(stochastic)) # (-H,+H) are the two binary values # H = "Glorot" H = 1. print("H = " + str(H)) # W_LR_scale = 1. W_LR_scale = "Glorot" # "Glorot" means we are using the coefficients from Glorot's paper print("W_LR_scale = " + str(W_LR_scale)) # Decaying LR LR_start = 0.005 print("LR_start = " + str(LR_start)) LR_fin = 0.0000005 # 0.0000003 print("LR_fin = " + str(LR_fin)) LR_decay = (LR_fin / LR_start)**(1. / num_epochs) print("LR_decay = " + str(LR_decay)) # BTW, LR decay might good for the BN moving average... train_set_size = 40000 shuffle_parts = 1 print("shuffle_parts = " + str(shuffle_parts)) print("noise = " + str(noise)) print("nalpha = " + str(nalpha)) print('Loading CIFAR-10 dataset...') cifar = CifarReader("./data/cifar-10-batches-py/") train_X, train_y = cifar.get_train_data(n_samples=train_set_size, noise=noise, alpha=nalpha) valid_X, valid_y = cifar.get_validation_data() test_X, test_y = cifar.get_test_data() print("train_set_size = " + str(train_y.shape[0])) print("validation_set_size = " + str(valid_y.shape[0])) print("test_set_size = " + str(test_y.shape[0])) # Log output with open(result_path + "params.txt", "a+") as l: print("batch_size = " + str(batch_size), file=l) print("alpha = " + str(alpha), file=l) print("epsilon = " + str(epsilon), file=l) print("num_epochs = " + str(num_epochs), file=l) print("dropout_in = " + str(dropout_in), file=l) print("dropout_hidden = " + str(dropout_hidden), file=l) if binary: print("activation = binary_net.binary_tanh_unit", file=l) else: print("activation = lasagne.nonlinearities.tanh", file=l) print("binary = " + str(binary), file=l) print("stochastic = " + str(stochastic), file=l) print("H = " + str(H), file=l) print("W_LR_scale = " + str(W_LR_scale), file=l) print("LR_start = " + str(LR_start), file=l) print("LR_fin = " + str(LR_fin), file=l) print("LR_decay = " + str(LR_decay), file=l) print("shuffle_parts = " + str(shuffle_parts), file=l) print("noise = " + str(noise), file=l) print("nalpha = " + str(nalpha), file=l) print("train_set_size = " + str(train_y.shape[0]), file=l) print("validation_set_size = " + str(valid_y.shape[0]), file=l) print("test_set_size = " + str(test_y.shape[0]), file=l) # bc01 format # Inputs in the range [-1,+1] # print("Inputs in the range [-1,+1]") train_X = np.reshape(np.subtract(np.multiply(2. / 255., train_X), 1.), (-1, 3, 32, 32)) valid_X = np.reshape(np.subtract(np.multiply(2. / 255., valid_X), 1.), (-1, 3, 32, 32)) test_X = np.reshape(np.subtract(np.multiply(2. / 255., test_X), 1.), (-1, 3, 32, 32)) # flatten targets train_y = np.hstack(train_y) valid_y = np.hstack(valid_y) test_y = np.hstack(test_y) # Onehot the targets train_y = np.float32(np.eye(10)[train_y]) valid_y = np.float32(np.eye(10)[valid_y]) test_y = np.float32(np.eye(10)[test_y]) # for hinge loss train_y = 2 * train_y - 1. valid_y = 2 * valid_y - 1. test_y = 2 * test_y - 1. print('Building the CNN...') # Prepare Theano variables for inputs and targets input = T.tensor4('inputs') target = T.matrix('targets') LR = T.scalar('LR', dtype=theano.config.floatX) cnn = lasagne.layers.InputLayer(shape=(None, 3, 32, 32), input_var=input) cnn = lasagne.layers.DropoutLayer(cnn, p=dropout_in) # 32C3-64C3-P2 cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=32, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=64, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = lasagne.layers.DropoutLayer(cnn, p=dropout_hidden) # 128FP-10FP cnn = binary_net.DenseLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=128) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = lasagne.layers.DropoutLayer(cnn, p=dropout_hidden) cnn = binary_net.DenseLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=10) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer( cnn, nonlinearity=lasagne.nonlinearities.softmax) train_output = lasagne.layers.get_output(cnn, deterministic=False) # squared hinge loss loss = T.mean(T.sqr(T.maximum(0., 1. - target * train_output))) if binary: # W updates W = lasagne.layers.get_all_params(cnn, binary=True) W_grads = binary_net.compute_grads(loss, cnn) updates = lasagne.updates.adam(loss_or_grads=W_grads, params=W, learning_rate=LR) updates = binary_net.clipping_scaling(updates, cnn) # other parameters updates params = lasagne.layers.get_all_params(cnn, trainable=True, binary=False) updates.update( lasagne.updates.adam(loss_or_grads=loss, params=params, learning_rate=LR)) else: params = lasagne.layers.get_all_params(cnn, trainable=True) updates = lasagne.updates.adam(loss_or_grads=loss, params=params, learning_rate=LR) test_output = lasagne.layers.get_output(cnn, deterministic=True) test_loss = T.mean(T.sqr(T.maximum(0., 1. - target * test_output))) test_err = T.mean(T.neq(T.argmax(test_output, axis=1), T.argmax(target, axis=1)), dtype=theano.config.floatX) # Compile a function performing a training step on a mini-batch (by giving the updates dictionary) # and returning the corresponding training loss: train_fn = theano.function([input, target, LR], loss, updates=updates) # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input, target], [test_loss, test_err]) print('Training...') binary_net.train(train_fn, val_fn, cnn, batch_size, LR_start, LR_decay, num_epochs, train_X, train_y, valid_X, valid_y, test_X, test_y, shuffle_parts=shuffle_parts, result_path=result_path)
test_set.y = 2 * test_set.y - 1. print('Building the CNN...') # Prepare Theano variables for inputs and targets input = T.tensor4('inputs') target = T.matrix('targets') LR = T.scalar('LR', dtype=theano.config.floatX) cnn = lasagne.layers.InputLayer(shape=(None, 1, 128, 128), input_var=input) # 6C5-P2 cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=6, filter_size=(5, 5), nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # 16C5-P2 cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H,
train_set.X = np.concatenate((train_set.X, x_train_flip), axis=0) train_set.y = np.concatenate((train_set.y, y_train_flip), axis=0) print('Building the CNN...') # Prepare Theano variables for inputs and targets input = T.tensor4('inputs') target = T.matrix('targets') LR = T.scalar('LR', dtype=theano.config.floatX) cnn = lasagne.layers.InputLayer(shape=(None, 1, 28, 28), input_var=input) #1 cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=filters, filter_size=(2, 2), pad='valid', nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) print(cnn.output_shape) #2 cnn = binary_net.Conv2DLayer(cnn,
def genCnv(input, num_outputs, learning_parameters): # A function to generate the cnv network topology which matches the overlay for the Pynq board. # WARNING: If you change this file, it's likely the resultant weights will not fit on the Pynq overlay. stochastic = False binary = True H = 1 activation = binary_net.binary_tanh_unit W_LR_scale = learning_parameters.W_LR_scale epsilon = learning_parameters.epsilon alpha = learning_parameters.alpha out_layers = [] inp = lasagne.layers.InputLayer(shape=(None, 1, 28, 28), input_var=input) # first conv cnn = binary_net.Conv2DLayer(inp, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=16, filter_size=(3, 3), pad='same', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # 1x1 conv cnn_1x1 = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=32, filter_size=(1, 1), pad='valid', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn_1x1 = lasagne.layers.MaxPool2DLayer(cnn_1x1, pool_size=(2, 2)) cnn_1x1 = lasagne.layers.BatchNormLayer(cnn_1x1, epsilon=epsilon, alpha=alpha) cnn_1x1 = lasagne.layers.NonlinearityLayer(cnn_1x1, nonlinearity=activation) out_layers.append(cnn_1x1) # 3x3 conv layer cnn_3x3 = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=32, filter_size=(3, 3), pad='same', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn_3x3 = lasagne.layers.MaxPool2DLayer(cnn_3x3, pool_size=(2, 2)) cnn_3x3 = lasagne.layers.BatchNormLayer(cnn_3x3, epsilon=epsilon, alpha=alpha) cnn_3x3 = lasagne.layers.NonlinearityLayer(cnn_3x3, nonlinearity=activation) out_layers.append(cnn_3x3) # 2nd conv layer cnn_5x5 = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=32, filter_size=(5, 5), pad='same', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn_5x5 = lasagne.layers.MaxPool2DLayer(cnn_5x5, pool_size=(2, 2)) cnn_5x5 = lasagne.layers.BatchNormLayer(cnn_5x5, epsilon=epsilon, alpha=alpha) cnn_5x5 = lasagne.layers.NonlinearityLayer(cnn_5x5, nonlinearity=activation) out_layers.append(cnn_5x5) cnn = lasagne.layers.concat(out_layers) # FC layer 1 cnn = binary_net.DenseLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=512) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # FC layer 2 cnn = binary_net.DenseLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=num_outputs) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) return cnn
def build_net(input,binary,stochastic=False,H=1.0,W_LR_scale="Glorot",activation=binary_net.binary_tanh_unit,epsilon=1e-4,alpha=.1,patch_size=32,channels=3,num_filters=256): cnn = lasagne.layers.InputLayer( shape=(None, channels, patch_size, patch_size), input_var=input) #1 cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=num_filters, filter_size=(2, 2), pad='valid', nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer( cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer( cnn, nonlinearity=activation) print(cnn.output_shape) #2 cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=num_filters, filter_size=(2, 2), pad='valid', nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer( cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer( cnn, nonlinearity=activation) print(cnn.output_shape) #3 cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=num_filters, filter_size=(2, 2), pad='valid', nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer( cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer( cnn, nonlinearity=activation) print(cnn.output_shape) #4 cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=num_filters, filter_size=(2, 2), pad='valid', nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer( cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer( cnn, nonlinearity=activation) print(cnn.output_shape) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) print(cnn.output_shape) #5 cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=num_filters, filter_size=(2, 2), pad='valid', nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer( cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer( cnn, nonlinearity=activation) print(cnn.output_shape) #6 cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=num_filters, filter_size=(2, 2), pad='valid', nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer( cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer( cnn, nonlinearity=activation) print(cnn.output_shape) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) print(cnn.output_shape) #7 cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=num_filters, filter_size=(2, 2), pad='valid', nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer( cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer( cnn, nonlinearity=activation) print(cnn.output_shape) #8 cnn = binary_net.Conv2DLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=num_filters, filter_size=(2, 2), pad='valid', nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer( cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer( cnn, nonlinearity=activation) print(cnn.output_shape) cnn = binary_net.DenseLayer( cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=2) cnn = lasagne.layers.BatchNormLayer( cnn, epsilon=epsilon, alpha=alpha) return cnn
def genCnv(input, num_outputs, learning_parameters): # A function to generate the cnv network topology which matches the overlay for the Pynq board. # WARNING: If you change this file, it's likely the resultant weights will not fit on the Pynq overlay. if num_outputs < 1 or num_outputs > 64: error("num_outputs should be in the range of 1 to 64.") stochastic = False binary = True H = 1 activation = binary_net.binary_tanh_unit W_LR_scale = learning_parameters.W_LR_scale epsilon = learning_parameters.epsilon alpha = learning_parameters.alpha cnn = lasagne.layers.InputLayer(shape=(None, 5, 64, 64), input_var=input) # conv maxpool cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=64, filter_size=(3, 3), pad='same', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) #32 cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # conv maxpool cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=64, filter_size=(3, 3), pad='same', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) #16 cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # conv conv maxpool cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=128, filter_size=(3, 3), pad='same', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=128, filter_size=(3, 3), pad='same', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) # 8 cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = lasagne.layers.DropoutLayer(cnn, p=0.6) # conv maxpool cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=256, filter_size=(3, 3), pad='same', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) #4 cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # conv maxpool cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=256, filter_size=(3, 3), pad='same', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) #2 cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) cnn = lasagne.layers.DropoutLayer(cnn, p=0.6) print(cnn.output_shape) # FC1 cnn = binary_net.DenseLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=512) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # FC 2 cnn = binary_net.DenseLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=512) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # output cnn = binary_net.DenseLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=num_outputs) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) return cnn
def genCnv(input, num_outputs, learning_parameters): # A function to generate the cnv network topology which matches the overlay for the Pynq board. # WARNING: If you change this file, it's likely the resultant weights will not fit on the Pynq overlay. stochastic = False binary = True H = 1 activation = binary_net.binary_tanh_unit W_LR_scale = learning_parameters.W_LR_scale epsilon = learning_parameters.epsilon alpha = learning_parameters.alpha inp = lasagne.layers.InputLayer(shape=(None, 1, 28, 28), input_var=input) # first conv cnn = binary_net.Conv2DLayer(inp, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=64, filter_size=(3, 3), pad='same', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) residual = cnn # conv 1 in Res block cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=64, filter_size=(3, 3), pad='same', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # special conv 3 is Res block cnn = binary_net.Conv2DLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, num_filters=64, filter_size=(3, 3), pad='same', flip_filters=False, nonlinearity=lasagne.nonlinearities.identity) cnn = ElemwiseSumLayer([residual, cnn], coeffs=1) cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2)) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # FC layer 1 cnn = binary_net.DenseLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=512) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation) # FC layer 2 cnn = binary_net.DenseLayer(cnn, binary=binary, stochastic=stochastic, H=H, W_LR_scale=W_LR_scale, nonlinearity=lasagne.nonlinearities.identity, num_units=num_outputs) cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha) return cnn