def build_network(hidden_dim, n_layers, activation, incl_prob=None, batchnm=True): weights_init = GlorotUniformInit(rng=rng) biases_init = ConstantInit(0.) if batchnm: act = [BatchNormalizationLayer(hidden_dim), activation] # act_in = [BatchNormalizationLayer(input_dim), activation] else: act = [activation] # act_in = [activation] l = [AffineLayer(input_dim, hidden_dim, weights_init, biases_init)] + act for i in range(n_layers - 1): if incl_prob is not None: l += [DropoutLayer(rng=rng, incl_prob=incl_prob), \ AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init)] + act else: l += [ AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init) ] + act l += [AffineLayer(hidden_dim, output_dim, weights_init, biases_init)] return MultipleLayerModel(l)
def build_network(hidden_dim, n_layers, activation): weights_init = GlorotUniformInit(rng=rng) biases_init = ConstantInit(0.) l = [AffineLayer(input_dim, hidden_dim, weights_init, biases_init), activation] for i in range(n_layers-1): l += [AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), activation] l += [AffineLayer(hidden_dim, output_dim, weights_init, biases_init)] return MultipleLayerModel(l)
def build_network(hidden_dim, n_layers, activation, incl_prob=None): weights_init = GlorotUniformInit(rng=rng) biases_init = ConstantInit(0.) l = [ AffineLayer(input_dim, hidden_dim, weights_init, biases_init), activation ] for i in range(n_layers - 1): if incl_prob is not None: l += [ DropoutLayer(rng=rng, incl_prob=incl_prob), AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), activation ] else: l += [ AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), activation ] l += [AffineLayer(hidden_dim, output_dim, weights_init, biases_init)] return MultipleLayerModel(l)
# different experiments you run. from mlp.layers import AffineLayer, SoftmaxLayer, SigmoidLayer, ReluLayer, LeakyReluLayer, ELULayer, SELULayer, DropoutLayer, BatchNormalizationLayer, ConvolutionalLayer, MaxPoolingLayer, ReshapeLayer from mlp.errors import CrossEntropySoftmaxError from mlp.models import MultipleLayerModel from mlp.initialisers import ConstantInit, GlorotUniformInit from mlp.learning_rules import GradientDescentLearningRule, Adam, RMSProp from mlp.optimisers import Optimiser #setup hyperparameters learning_rate = 0.01 num_epochs = 100 stats_interval = 1 input_dim, output_dim = 784, 47 weights_init = GlorotUniformInit(rng=rng) biases_init = ConstantInit(0.) #1 convmodel = MultipleLayerModel([ ReshapeLayer((1, 28, 28)), ConvolutionalLayer(1, 5, 28, 28, 5, 5), ReluLayer(), MaxPoolingLayer(2), ReshapeLayer(), AffineLayer(12 * 12 * 5, 300, weights_init, biases_init), ReluLayer(), AffineLayer(300, output_dim, weights_init, biases_init) ])
# In[3]: from mlp.layers import AffineLayer, SoftmaxLayer, SigmoidLayer, ReluLayer, LeakyReluLayer, ELULayer, SELULayer from mlp.errors import CrossEntropySoftmaxError from mlp.models import MultipleLayerModel from mlp.initialisers import ConstantInit, GlorotUniformInit, SELUInit from mlp.learning_rules import GradientDescentLearningRule from mlp.optimisers import Optimiser #setup hyperparameters learning_rate = 0.1 num_epochs = 100 stats_interval = 1 input_dim, output_dim, hidden_dim = 784, 10, 100 weights_init = GlorotUniformInit(rng=rng) biases_init = ConstantInit(0.) model_SigmoidLayer = MultipleLayerModel([ AffineLayer(input_dim, hidden_dim, weights_init, biases_init), SigmoidLayer(), AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), SigmoidLayer(), AffineLayer(hidden_dim, output_dim, weights_init, biases_init) ]) model_ReluLayer = MultipleLayerModel([ AffineLayer(input_dim, hidden_dim, weights_init, biases_init), ReluLayer(), AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), ReluLayer(),
learning_rate = 0.01 mom_coeff = 0.9 weights_init_gain = 0.5 biases_init = 0. # Set up a logger object to print info about the training run to stdout logger = logging.getLogger() logger.setLevel(logging.INFO) logger.handlers = [logging.StreamHandler()] # Create data provider objects for the MNIST data set train_data = MNISTDataProvider('train', batch_size, rng=rng) valid_data = MNISTDataProvider('valid', batch_size, rng=rng) input_dim, output_dim, hidden_dim = 784, 10, 100 weights_init = GlorotUniformInit(weights_init_gain, rng) biases_init = ConstantInit(biases_init) error = CrossEntropySoftmaxError() learning_rule = MomentumLearningRule(learning_rate, mom_coeff) data_monitors = {'acc': lambda y, t: (y.argmax(-1) == t.argmax(-1)).mean()} # L1(1e-3,1e-4,1e-5,1e-6) # In[4]: weights_penalties = [ None, L1Penalty(1e-3), L1Penalty(1e-4), L1Penalty(1e-5), L1Penalty(1e-6)
logger = logging.getLogger() logger.setLevel(logging.INFO) logger.handlers = [logging.StreamHandler()] # Create data provider objects for the MNIST data set train_data = MNISTDataProvider('train', batch_size=batch_size, rng=rng) valid_data = MNISTDataProvider('valid', batch_size=batch_size, rng=rng) #setup hyperparameters learning_rate = 0.1 num_epochs = 100 stats_interval = 1 input_dim, output_dim, hidden_dim = 784, 10, 100 #setup initialisations relu_init = GlorotUniformInit(gain=0.5, rng=rng) biases_init = ConstantInit(0.) # selufanin = MultipleLayerModel([ # AffineLayer(input_dim, hidden_dim, UniformInit(-np.sqrt(3/input_dim), np.sqrt(3/input_dim)), biases_init), # SELULayer(), # AffineLayer(hidden_dim, hidden_dim, UniformInit(-np.sqrt(3/hidden_dim), np.sqrt(3/hidden_dim)), biases_init), # SELULayer(), # AffineLayer(hidden_dim, output_dim, UniformInit(-np.sqrt(3/hidden_dim), np.sqrt(3/hidden_dim)), biases_init) # ]) # selufanout = MultipleLayerModel([ # AffineLayer(input_dim, hidden_dim, UniformInit(-np.sqrt(3/hidden_dim), np.sqrt(3/hidden_dim)), biases_init), # SELULayer(), # AffineLayer(hidden_dim, hidden_dim, UniformInit(-np.sqrt(3/hidden_dim), np.sqrt(3/hidden_dim)), biases_init), # SELULayer(),