def build_network(hidden_dim, n_layers, activation, incl_prob=None, batchnm=True): weights_init = GlorotUniformInit(rng=rng) biases_init = ConstantInit(0.) if batchnm: act = [BatchNormalizationLayer(hidden_dim), activation] # act_in = [BatchNormalizationLayer(input_dim), activation] else: act = [activation] # act_in = [activation] l = [AffineLayer(input_dim, hidden_dim, weights_init, biases_init)] + act for i in range(n_layers - 1): if incl_prob is not None: l += [DropoutLayer(rng=rng, incl_prob=incl_prob), \ AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init)] + act else: l += [ AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init) ] + act l += [AffineLayer(hidden_dim, output_dim, weights_init, biases_init)] return MultipleLayerModel(l)
def build_network(hidden_dim, n_layers, activation): weights_init = GlorotUniformInit(rng=rng) biases_init = ConstantInit(0.) l = [AffineLayer(input_dim, hidden_dim, weights_init, biases_init), activation] for i in range(n_layers-1): l += [AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), activation] l += [AffineLayer(hidden_dim, output_dim, weights_init, biases_init)] return MultipleLayerModel(l)
def build_network(hidden_dim, n_layers, activation, incl_prob=None): weights_init = GlorotUniformInit(rng=rng) biases_init = ConstantInit(0.) l = [ AffineLayer(input_dim, hidden_dim, weights_init, biases_init), activation ] for i in range(n_layers - 1): if incl_prob is not None: l += [ DropoutLayer(rng=rng, incl_prob=incl_prob), AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), activation ] else: l += [ AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), activation ] l += [AffineLayer(hidden_dim, output_dim, weights_init, biases_init)] return MultipleLayerModel(l)
from mlp.layers import AffineLayer, SoftmaxLayer, SigmoidLayer, ReluLayer, LeakyReluLayer, ELULayer, SELULayer, DropoutLayer, BatchNormalizationLayer, ConvolutionalLayer, MaxPoolingLayer, ReshapeLayer from mlp.errors import CrossEntropySoftmaxError from mlp.models import MultipleLayerModel from mlp.initialisers import ConstantInit, GlorotUniformInit from mlp.learning_rules import GradientDescentLearningRule, Adam, RMSProp from mlp.optimisers import Optimiser #setup hyperparameters learning_rate = 0.01 num_epochs = 100 stats_interval = 1 input_dim, output_dim = 784, 47 weights_init = GlorotUniformInit(rng=rng) biases_init = ConstantInit(0.) #1 convmodel = MultipleLayerModel([ ReshapeLayer((1, 28, 28)), ConvolutionalLayer(1, 5, 28, 28, 5, 5), ReluLayer(), MaxPoolingLayer(2), ReshapeLayer(), AffineLayer(12 * 12 * 5, 300, weights_init, biases_init), ReluLayer(), AffineLayer(300, output_dim, weights_init, biases_init) ]) #2
mom_coeff = 0.9 weights_init_gain = 0.5 biases_init = 0. # Set up a logger object to print info about the training run to stdout logger = logging.getLogger() logger.setLevel(logging.INFO) logger.handlers = [logging.StreamHandler()] # Create data provider objects for the MNIST data set train_data = MNISTDataProvider('train', batch_size, rng=rng) valid_data = MNISTDataProvider('valid', batch_size, rng=rng) input_dim, output_dim, hidden_dim = 784, 10, 100 weights_init = GlorotUniformInit(weights_init_gain, rng) biases_init = ConstantInit(biases_init) error = CrossEntropySoftmaxError() learning_rule = MomentumLearningRule(learning_rate, mom_coeff) data_monitors = {'acc': lambda y, t: (y.argmax(-1) == t.argmax(-1)).mean()} # L1(1e-3,1e-4,1e-5,1e-6) # In[4]: weights_penalties = [ None, L1Penalty(1e-3), L1Penalty(1e-4), L1Penalty(1e-5), L1Penalty(1e-6) ]