def test_set_get_weights_Softmax(): """ Tests setting and getting weights for Softmax layer. """ num_classes = 2 dim = 3 conv_dim = [3, 4, 5] # VectorSpace input space layer = Softmax(num_classes, 's', irange=.1) softmax_mlp = MLP(layers=[layer], input_space=VectorSpace(dim=dim)) vec_weights = np.random.randn(dim, num_classes).astype(config.floatX) layer.set_weights(vec_weights) assert np.allclose(layer.W.get_value(), vec_weights) layer.W.set_value(vec_weights) assert np.allclose(layer.get_weights(), vec_weights) # Conv2DSpace input space layer = Softmax(num_classes, 's', irange=.1) softmax_mlp = MLP(layers=[layer], input_space=Conv2DSpace(shape=(conv_dim[0], conv_dim[1]), num_channels=conv_dim[2])) conv_weights = np.random.randn(conv_dim[0], conv_dim[1], conv_dim[2], num_classes).astype(config.floatX) layer.set_weights(conv_weights.reshape(np.prod(conv_dim), num_classes)) assert np.allclose(layer.W.get_value(), conv_weights.reshape(np.prod(conv_dim), num_classes)) layer.W.set_value(conv_weights.reshape(np.prod(conv_dim), num_classes)) assert np.allclose(layer.get_weights_topo(), np.transpose(conv_weights, axes=(3, 0, 1, 2)))
def test_softmax_binary_targets(): """ Constructs softmax layers with binary target and with vector targets to check that they give the same cost. """ num_classes = 10 batch_size = 20 mlp_bin = MLP( layers=[Softmax(num_classes, 's1', irange=0.1, binary_target_dim=1)], nvis=100) mlp_vec = MLP(layers=[Softmax(num_classes, 's1', irange=0.1)], nvis=100) X = mlp_bin.get_input_space().make_theano_batch() y_bin = mlp_bin.get_target_space().make_theano_batch() y_vec = mlp_vec.get_target_space().make_theano_batch() y_hat_bin = mlp_bin.fprop(X) y_hat_vec = mlp_vec.fprop(X) cost_bin = theano.function([X, y_bin], mlp_bin.cost(y_bin, y_hat_bin), allow_input_downcast=True) cost_vec = theano.function([X, y_vec], mlp_vec.cost(y_vec, y_hat_vec), allow_input_downcast=True) X_data = np.random.random(size=(batch_size, 100)) y_bin_data = np.random.randint(low=0, high=10, size=(batch_size, 1)) y_vec_data = np.zeros((batch_size, num_classes)) y_vec_data[np.arange(batch_size), y_bin_data.flatten()] = 1 np.testing.assert_allclose(cost_bin(X_data, y_bin_data), cost_vec(X_data, y_vec_data))
def test_softmax_weight_init(): """ Constructs softmax layers with different weight initialization parameters. """ nvis = 5 num_classes = 10 MLP(layers=[Softmax(num_classes, 's', irange=0.1)], nvis=nvis) MLP(layers=[Softmax(num_classes, 's', istdev=0.1)], nvis=nvis) MLP(layers=[Softmax(num_classes, 's', sparse_init=2)], nvis=nvis)
def test_execution_order(): # ensure save is called directly after monitoring by checking # parameter values in `on_monitor` and `on_save`. model = MLP(layers=[Softmax(layer_name='y', n_classes=2, irange=0.)], nvis=3) dataset = DenseDesignMatrix(X=np.random.normal(size=(6, 3)), y=np.random.normal(size=(6, 2))) epoch_counter = EpochCounter(max_epochs=1) algorithm = SGD(batch_size=2, learning_rate=0.1, termination_criterion=epoch_counter) extension = ParamMonitor() train = Train(dataset=dataset, model=model, algorithm=algorithm, extensions=[extension], save_freq=1, save_path="save.pkl") # mock save train.save = MethodType(only_run_extensions, train) train.main_loop()
def get_mlp_softmax(structure): n_input, n_output = structure # layer = Softmax(n_classes=n_output, irange=0.02, layer_name='y') layer = MLP(layers=[Softmax(n_classes=n_output, irange=0.02, layer_name='y')], nvis=500) return layer
def loadModel2(pklname): ishape = Conv2DSpace(shape=[48, 48], num_channels=1) nclass = 7 # create layers nk = [30, 40] # train3040.pkl.cpu #nk = [32, 20, 10] #nk = [40,30,20] ks = [[8, 8], [5, 5], [3, 3]] ir = [0.05, 0.05, 0.05] ps = [[4, 4], [4, 4], [2, 2]] pd = [[2, 2], [2, 2], [2, 2]] kn = [0.9, 0.9, 0.9] layers = DBL_ConvLayers(nk, ks, ir, ps, pd, kn) layer_soft = Softmax( layer_name='y', #max_col_norm = 1.9365, n_classes=nclass, #init_bias_target_marginals=DBL.ds_train, #istdev = .05 irange=.0) #layers.append(layer_soft) # create DBL_model model = MLP(layers, input_space=ishape) layer_params = cPickle.load(open(pklname)) layer_id = 0 for layer in model.layers: if layer_id < len(layers) - 1: layer.set_weights(layer_params[layer_id][0]) layer.set_biases(layer_params[layer_id][1]) layer_id = layer_id + 1 return model
def test_exhaustive_dropout_average(): # This is only a smoke test: verifies that it compiles and runs, # not any particular value. inp = theano.tensor.matrix() mlp = MLP(nvis=2, layers=[ Linear(2, 'h0', irange=0.8), Linear(2, 'h1', irange=0.8), Softmax(3, 'out', irange=0.8) ]) out = exhaustive_dropout_average(mlp, inp) f = theano.function([inp], out, allow_input_downcast=True) f([[2.3, 4.9]]) out = exhaustive_dropout_average(mlp, inp, input_scales={'h0': 3}) f = theano.function([inp], out, allow_input_downcast=True) f([[2.3, 4.9]]) out = exhaustive_dropout_average(mlp, inp, masked_input_layers=['h1']) f = theano.function([inp], out, allow_input_downcast=True) f([[2.3, 4.9]]) np.testing.assert_raises(ValueError, exhaustive_dropout_average, mlp, inp, ['h5']) np.testing.assert_raises(ValueError, exhaustive_dropout_average, mlp, inp, ['h0'], 2., {'h5': 3.})
def construct_model(inputs_shape, filters, bias, kernel_stride, pool_type, pool_shape, pool_stride, conv_class): conv_3d_input_space = Conv3DSpace(inputs_shape[1:4], num_channels=inputs_shape[4], axes=('b', 0, 1, 2, 'c')) conv_3d_layer = Conv3dElemwise(output_channels=filters.shape[0], kernel_shape=filters.shape[1:4], kernel_stride=kernel_stride, layer_name='conv3d_lin', nonlinearity=IdentityConvNonlinearity(), conv_transformer_class=conv_class, pool_transformer_class=CudnnPoolTransformer, irange=0.001, pool_type=pool_type, pool_shape=pool_shape, pool_stride=pool_stride) softmax_layer = Softmax(max_col_norm=2, layer_name='y', n_classes=2, istdev=.05) mlp = MLP(input_space=conv_3d_input_space, layers=[conv_3d_layer, softmax_layer]) # convert filters to correct axes (('b', 0, 1, 2, ' c') are test data axes) converted_filters = Conv3DSpace.convert_numpy( filters, ('b', 0, 1, 2, 'c'), conv_3d_layer.detector_space.axes) conv_3d_layer.set_weights(converted_filters) conv_3d_layer.set_biases(bias) return mlp
def test_multiple_inputs(): """ Create a VectorSpacesDataset with two inputs (features0 and features1) and train an MLP which takes both inputs for 1 epoch. """ mlp = MLP(layers=[ FlattenerLayer( CompositeLayer('composite', [Linear(10, 'h0', 0.1), Linear(10, 'h1', 0.1)], { 0: [1], 1: [0] })), Softmax(5, 'softmax', 0.1) ], input_space=CompositeSpace([VectorSpace(15), VectorSpace(20)]), input_source=('features0', 'features1')) dataset = VectorSpacesDataset( (np.random.rand(20, 20).astype(theano.config.floatX), np.random.rand(20, 15).astype(theano.config.floatX), np.random.rand(20, 5).astype(theano.config.floatX)), (CompositeSpace( [VectorSpace(20), VectorSpace(15), VectorSpace(5)]), ('features1', 'features0', 'targets'))) train = Train(dataset, mlp, SGD(0.1, batch_size=5)) train.algorithm.termination_criterion = EpochCounter(1) train.main_loop()
def test_get_layer_monitor_channels(): """ Create a MLP with multiple layer types and get layer monitoring channels for MLP. """ mlp = MLP(layers=[ FlattenerLayer( CompositeLayer('composite', [Linear(10, 'h0', 0.1), Linear(10, 'h1', 0.1)], { 0: [1], 1: [0] })), Softmax(5, 'softmax', 0.1) ], input_space=CompositeSpace([VectorSpace(15), VectorSpace(20)]), input_source=('features0', 'features1')) dataset = VectorSpacesDataset( (np.random.rand(20, 20).astype(theano.config.floatX), np.random.rand(20, 15).astype(theano.config.floatX), np.random.rand(20, 5).astype(theano.config.floatX)), (CompositeSpace( [VectorSpace(20), VectorSpace(15), VectorSpace(5)]), ('features1', 'features0', 'targets'))) state_below = mlp.get_input_space().make_theano_batch() targets = mlp.get_target_space().make_theano_batch() mlp.get_layer_monitoring_channels(state_below=state_below, state=None, targets=targets)
def model1(): #pdb.set_trace() # train set X has dim (60,000, 784), y has dim (60,000, 10) train_set = MNIST(which_set='train', one_hot=True) # test set X has dim (10,000, 784), y has dim (10,000, 10) valid_set = MNIST(which_set='test', one_hot=True) test_set = MNIST(which_set='test', one_hot=True) #import pdb #pdb.set_trace() #print train_set.X.shape[1] # =====<Create the MLP Model>===== h2_layer = NoisyRELU(layer_name='h1', sparse_init=15, noise_factor=5, dim=1000, desired_active_rate=0.2, bias_factor=20, max_col_norm=1) #h2_layer = RectifiedLinear(layer_name='h2', dim=100, sparse_init=15, max_col_norm=1) #print h1_layer.get_params() #h2 = RectifiedLinear(layer_name='h2', dim=500, sparse_init=15, max_col_norm=1) y_layer = Softmax(layer_name='y', n_classes=10, irange=0., max_col_norm=1) mlp = MLP(batch_size=200, input_space=VectorSpace(dim=train_set.X.shape[1]), layers=[h2_layer, y_layer]) # =====<Create the SGD algorithm>===== sgd = SGD(init_momentum=0.1, learning_rate=0.01, monitoring_dataset={'valid': valid_set}, cost=MethodCost('cost_from_X'), termination_criterion=MonitorBased( channel_name='valid_y_misclass', prop_decrease=0.001, N=50)) #sgd.setup(model=mlp, dataset=train_set) # =====<Extensions>===== ext = [MomentumAdjustor(start=1, saturate=10, final_momentum=0.9)] # =====<Create Training Object>===== save_path = './mlp_model1.pkl' train_obj = Train(dataset=train_set, model=mlp, algorithm=sgd, extensions=ext, save_path=save_path, save_freq=0) #train_obj.setup_extensions() #import pdb #pdb.set_trace() train_obj.main_loop() # =====<Run the training>===== '''
def test_softmax_generality(): "tests that the Softmax layer can score outputs it did not create" nvis = 1 num_classes = 2 model = MLP(layers=[Softmax(num_classes, 's', irange=0.1)], nvis=nvis) Z = T.matrix() Y_hat = T.nnet.softmax(Z) Y = T.matrix() model.layers[-1].cost(Y=Y, Y_hat=Y_hat)
def test_flattener_layer_state_separation_for_softmax(): """ Creates a CompositeLayer wrapping two Softmax layers and ensures that state gets correctly picked apart. """ soft1 = Softmax(5, 'sf1', .1) soft2 = Softmax(5, 'sf2', .1) mlp = MLP(layers=[FlattenerLayer(CompositeLayer('comp', [soft1, soft2]))], nvis=2) X = np.random.rand(20, 2).astype(theano.config.floatX) y = np.random.rand(20, 10).astype(theano.config.floatX) dataset = DenseDesignMatrix(X=X, y=y) train = Train(dataset, mlp, SGD(0.1, batch_size=5, monitoring_dataset=dataset)) train.algorithm.termination_criterion = EpochCounter(1) train.main_loop()
def test_sampled_dropout_average(): # This is only a smoke test: verifies that it compiles and runs, # not any particular value. inp = theano.tensor.matrix() mlp = MLP(nvis=2, layers=[Linear(2, 'h0', irange=0.8), Linear(2, 'h1', irange=0.8), Softmax(3, 'out', irange=0.8)]) out = sampled_dropout_average(mlp, inp, 5) f = theano.function([inp], out, allow_input_downcast=True) f([[2.3, 4.9]])
def model2(): #pdb.set_trace() # train set X has dim (60,000, 784), y has dim (60,000, 10) train_set = MNIST(which_set='train', one_hot=True) # test set X has dim (10,000, 784), y has dim (10,000, 10) test_set = MNIST(which_set='test', one_hot=True) # =====<Create the MLP Model>===== h1_layer = RectifiedLinear(layer_name='h1', dim=1000, irange=0.5) #print h1_layer.get_params() h2_layer = RectifiedLinear(layer_name='h2', dim=1000, sparse_init=15, max_col_norm=1) y_layer = Softmax(layer_name='y', n_classes=train_set.y.shape[1], irange=0.5) mlp = MLP(batch_size=100, input_space=VectorSpace(dim=train_set.X.shape[1]), layers=[h1_layer, h2_layer, y_layer]) # =====<Create the SGD algorithm>===== sgd = SGD(batch_size=100, init_momentum=0.1, learning_rate=0.01, monitoring_dataset={ 'valid': train_set, 'test': test_set }, cost=SumOfCosts(costs=[ MethodCost('cost_from_X'), WeightDecay(coeffs=[0.00005, 0.00005, 0.00005]) ]), termination_criterion=MonitorBased( channel_name='valid_y_misclass', prop_decrease=0.0001, N=5)) #sgd.setup(model=mlp, dataset=train_set) # =====<Extensions>===== ext = [MomentumAdjustor(start=1, saturate=10, final_momentum=0.99)] # =====<Create Training Object>===== save_path = './mlp_model2.pkl' train_obj = Train(dataset=train_set, model=mlp, algorithm=sgd, extensions=ext, save_path=save_path, save_freq=0) #train_obj.setup_extensions() train_obj.main_loop()
def test_softmax_bin_targets_channels(seed=0): """ Constructs softmax layers with binary target and with vector targets to check that they give the same 'misclass' channel value. """ np.random.seed(seed) num_classes = 2 batch_size = 5 mlp_bin = MLP( layers=[Softmax(num_classes, 's1', irange=0.1, binary_target_dim=1)], nvis=100 ) mlp_vec = MLP( layers=[Softmax(num_classes, 's1', irange=0.1)], nvis=100 ) X = mlp_bin.get_input_space().make_theano_batch() y_bin = mlp_bin.get_target_space().make_theano_batch() y_vec = mlp_vec.get_target_space().make_theano_batch() X_data = np.random.random(size=(batch_size, 100)) X_data = X_data.astype(theano.config.floatX) y_bin_data = np.random.randint(low=0, high=num_classes, size=(batch_size, 1)) y_vec_data = np.zeros((batch_size, num_classes), dtype=theano.config.floatX) y_vec_data[np.arange(batch_size), y_bin_data.flatten()] = 1 def channel_value(channel_name, model, y, y_data): chans = model.get_monitoring_channels((X, y)) f_channel = theano.function([X, y], chans['s1_' + channel_name]) return f_channel(X_data, y_data) for channel_name in ['misclass', 'nll']: vec_val = channel_value(channel_name, mlp_vec, y_vec, y_vec_data) bin_val = channel_value(channel_name, mlp_bin, y_bin, y_bin_data) print(channel_name, vec_val, bin_val) np.testing.assert_allclose(vec_val, bin_val)
def model3(): #pdb.set_trace() # train set X has dim (60,000, 784), y has dim (60,000, 10) train_set = SVHN_On_Memory(which_set='train') # test set X has dim (10,000, 784), y has dim (10,000, 10) test_set = SVHN_On_Memory(which_set='test') # =====<Create the MLP Model>===== h1_layer = NoisyRELU(layer_name='h1', dim=2000, threshold=5, sparse_init=15, max_col_norm=1) #print h1_layer.get_params() #h2_layer = NoisyRELU(layer_name='h2', dim=100, threshold=15, sparse_init=15, max_col_norm=1) y_layer = Softmax(layer_name='y', n_classes=train_set.y.shape[1], irange=0.5) mlp = MLP(batch_size=64, input_space=VectorSpace(dim=train_set.X.shape[1]), layers=[h1_layer, y_layer]) # =====<Create the SGD algorithm>===== sgd = SGD(batch_size=64, init_momentum=0.1, learning_rate=0.01, monitoring_dataset={ 'valid': train_set, 'test': test_set }, cost=MethodCost('cost_from_X'), termination_criterion=MonitorBased( channel_name='valid_y_misclass', prop_decrease=0.001, N=50)) #sgd.setup(model=mlp, dataset=train_set) # =====<Extensions>===== ext = [MomentumAdjustor(start=1, saturate=10, final_momentum=0.9)] # =====<Create Training Object>===== save_path = './mlp_model.pkl' train_obj = Train(dataset=train_set, model=mlp, algorithm=sgd, extensions=ext, save_path=save_path, save_freq=10) #train_obj.setup_extensions() train_obj.main_loop()
def test_exhaustive_dropout_average(): # This is only a smoke test: verifies that it compiles and runs, # not any particular value. inp = theano.tensor.matrix() mlp = MLP(nvis=2, layers=[ Linear(2, 'h0', irange=0.8), Linear(2, 'h1', irange=0.8), Softmax(3, 'out', irange=0.8) ]) out = exhaustive_dropout_average(mlp, inp) f = theano.function([inp], out) f([[2.3, 4.9]])
def get_conv2D(dim_input, batch_size=200): config = { 'batch_size': batch_size, 'input_space': Conv2DSpace(shape=dim_input[:2], num_channels=dim_input[2]), 'layers': [ ConvRectifiedLinear(layer_name='h0', output_channels=20, irange=.04, init_bias=0., max_kernel_norm=1.9365, kernel_shape=[5, 5], border_mode='full', pool_shape=[8, 4], pool_stride=[3, 2], W_lr_scale=0.64), ConvRectifiedLinear(layer_name='h1', output_channels=40, irange=.04, init_bias=0., max_kernel_norm=1.9365, kernel_shape=[3, 3], border_mode='valid', pool_shape=[4, 4], pool_stride=[2, 2], W_lr_scale=0.64), ConvRectifiedLinear(layer_name='h2', output_channels=60, irange=.04, init_bias=0., max_kernel_norm=1.9365, kernel_shape=[3, 3], border_mode='valid', pool_shape=[2, 2], pool_stride=[1, 1], W_lr_scale=0.64), ConvRectifiedLinear(layer_name='h3', output_channels=80, irange=.04, init_bias=0., max_kernel_norm=1.9365, kernel_shape=[3, 3], pool_shape=[2, 2], pool_stride=[2, 2], W_lr_scale=0.64), Softmax(layer_name='y', n_classes=2, istdev=.025, W_lr_scale=0.25) ] } return MLP(**config)
def test_softmax_two_binary_targets(): """ Constructs softmax layers with two binary targets and with vector targets to check that they give the same cost. """ num_classes = 10 batch_size = 20 mlp_bin = MLP( layers=[Softmax(num_classes, 's1', irange=0.1, binary_target_dim=2)], nvis=100) mlp_vec = MLP(layers=[Softmax(num_classes, 's1', irange=0.1)], nvis=100) X = mlp_bin.get_input_space().make_theano_batch() y_bin = mlp_bin.get_target_space().make_theano_batch() y_vec = mlp_vec.get_target_space().make_theano_batch() y_hat_bin = mlp_bin.fprop(X) y_hat_vec = mlp_vec.fprop(X) cost_bin = theano.function([X, y_bin], mlp_bin.cost(y_bin, y_hat_bin), allow_input_downcast=True) cost_vec = theano.function([X, y_vec], mlp_vec.cost(y_vec, y_hat_vec), allow_input_downcast=True) X_data = np.random.random(size=(batch_size, 100)) # binary and vector costs can only match # if binary targets are mutually exclusive y_bin_data = np.concatenate([ np.random.permutation(10)[:2].reshape((1, 2)) for _ in range(batch_size) ]) y_vec_data = np.zeros((batch_size, num_classes)) y_vec_data[np.arange(batch_size), y_bin_data[:, 0].flatten()] = 1 y_vec_data[np.arange(batch_size), y_bin_data[:, 1].flatten()] = 1 np.testing.assert_allclose(cost_bin(X_data, y_bin_data), cost_vec(X_data, y_vec_data))
def get_conv2(dim_input): config = { 'batch_size': bsize, 'input_space': Conv2DSpace(shape=dim_input[:2], num_channels=dim_input[2], axes=['c', 0, 1, 'b']), 'layers': [ ConvRectifiedLinear(layer_name='h0', output_channels=20, irange=.005, max_kernel_norm=.9, kernel_shape=[7, 7], pool_shape=[4, 4], pool_stride=[2, 2], W_lr_scale=.1, b_lr_scale=.1), ConvRectifiedLinear(layer_name='h1', output_channels=40, irange=.005, max_kernel_norm=0.9, kernel_shape=[7, 7], pool_shape=[4, 4], pool_stride=[2, 2], W_lr_scale=.1, b_lr_scale=.1), ConvRectifiedLinear(layer_name='h2', output_channels=80, irange=.005, max_kernel_norm=0.9, kernel_shape=[5, 5], pool_shape=[2, 2], pool_stride=[2, 2], W_lr_scale=.1, b_lr_scale=.1), RectifiedLinear(layer_name='h3', irange=.005, dim=500, max_col_norm=1.9), Softmax(layer_name='y', n_classes=nclass, irange=.005, max_col_norm=1.9) ] } return MLP(**config)
def construct_layers(model_params, dataset, out_nonlin): def add_maxout_conv_layer(params): params['kernel_shape'] = [ params['kernel_shape'], params['kernel_shape'] ] params['pool_shape'] = [params['pool_shape'], params['pool_shape']] params['pool_stride'] = [params['pool_stride'], params['pool_stride']] params['num_channels'] = params['num_units'] del params['num_units'] return _MaxoutConvC01B(**params) def add_maxout_layer(params): return _Maxout(**params) def add_sigmoid_layer(params): return _Sigmoid(**params) def add_rectified_linear(params): return _RectifiedLinear(**params) layers = [] constructor = { 'maxout': add_maxout_layer, 'maxout_convolution': add_maxout_conv_layer, 'sigmoid': add_sigmoid_layer, 'rectified_linear': add_rectified_linear } for params in model_params['layers']: layers.append(make(constructor[params['type']], params)) if out_nonlin == 'LINEARGAUSSIAN': layer = LinearGaussian(init_beta=beta_from_targets(dataset), min_beta=1., max_beta=100., beta_lr_scale=1., layer_name='y', irange=.005, dim=dataset.y.shape[1], init_bias=mean_of_targets(dataset)) else: layer = Softmax(max_col_norm=1.9365, layer_name='y', n_classes=dataset.y.shape[1], irange=.005) layers.append(layer) return layers
def get_maxout(dim_input, batch_size=100): config = { 'batch_size': batch_size, 'input_space': Conv2DSpace(shape=dim_input[:2], num_channels=dim_input[2], axes=['c', 0, 1, 'b']), 'layers': [ MaxoutConvC01B(layer_name='h0', pad=0, num_channels=72, num_pieces=2, kernel_shape=[8, 8], pool_shape=[4, 4], pool_stride=[2, 2], irange=.005, max_kernel_norm=.9), MaxoutConvC01B(layer_name='h1', pad=3, num_channels=72, num_pieces=2, kernel_shape=[8, 8], pool_shape=[4, 4], pool_stride=[2, 2], irange=.005, max_kernel_norm=1.9365), MaxoutConvC01B(layer_name='h2', pad=3, num_channels=48, num_pieces=4, kernel_shape=[5, 5], pool_shape=[2, 2], pool_stride=[2, 2], irange=.005, max_kernel_norm=1.9365), Softmax(layer_name='y', max_col_norm=1.9365, n_classes=10, irange=0.005) ] } return MLP(**config)
def get_conv1D(dim_input): config = { 'batch_size': 200, 'input_space': Conv2DSpace(shape=dim_input[:2], num_channels=dim_input[2]), 'dropout_include_probs': [1, 1, 1, 0.5, 1], 'dropout_input_include_prob': 0.8, 'layers': [ ConvRectifiedLinear(layer_name='h0', output_channels=40, irange=.04, init_bias=0.5, max_kernel_norm=1.9365, kernel_shape=[7, 1], pool_shape=[4, 1], pool_stride=[3, 1], W_lr_scale=0.64), ConvRectifiedLinear(layer_name='h1', output_channels=30, irange=.05, init_bias=0., max_kernel_norm=1.9365, kernel_shape=[5, 1], pool_shape=[4, 1], pool_stride=[1, 1], W_lr_scale=1.), ConvRectifiedLinear(layer_name='h2', output_channels=20, irange=.05, init_bias=0., max_kernel_norm=1.9365, kernel_shape=[5, 1], pool_shape=[4, 1], pool_stride=[1, 1], W_lr_scale=1.), ConvRectifiedLinear(layer_name='h3', output_channels=10, irange=.05, init_bias=0., max_kernel_norm=1.9365, kernel_shape=[3, 1], pool_shape=[4, 1], pool_stride=[2, 1], W_lr_scale=1.), Softmax(layer_name='y', n_classes=2, irange=.025, W_lr_scale=0.25) ] } return MLP(**config)
def construct_dbn_from_stack(stack): # some settings irange = 0.05 layers = [] for ii, layer in enumerate(stack.layers()): layers.append( Sigmoid(dim=layer.nhid, layer_name='h' + str(ii), irange=irange, max_col_norm=2.)) nc = 159 if SUBMODEL == 1 else 8 # softmax layer at then end for classification layers.append(Softmax(n_classes=nc, layer_name='y', irange=irange)) dbn = MLP(layers=layers, nvis=stack.layers()[0].get_input_space().dim) # copy weigths to DBN for ii, layer in enumerate(stack.layers()): dbn.layers[ii].set_weights(layer.get_weights()) dbn.layers[ii].set_biases(layer.hidbias.get_value(borrow=False)) return dbn
def produce_train_obj(new_epochs, model=None): if model is None: model = MLP( layers=[Softmax(layer_name='y', n_classes=2, irange=0.)], nvis=3) else: model = push_monitor(model, 'old_monitor', transfer_experience=True) dataset = DenseDesignMatrix(X=np.random.normal(size=(6, 3)), y=np.random.normal(size=(6, 2))) epoch_counter = EpochCounter(max_epochs=N, new_epochs=new_epochs) algorithm = SGD(batch_size=2, learning_rate=0.1, termination_criterion=epoch_counter) return Train(dataset=dataset, model=model, algorithm=algorithm)
def test_correctness(): model = MLP(layers=[ Linear(dim=10, layer_name='linear', irange=1.0), Softmax(n_classes=2, layer_name='softmax', irange=1.0) ], batch_size=10, nvis=10) cost = LpPenalty(variables=model.get_params(), p=2) penalty = cost.expr(model, None) penalty_function = theano.function(inputs=[], outputs=penalty) p = penalty_function() actual_p = 0 for param in model.get_params(): actual_p += numpy.sum(param.get_value()**2) assert numpy.allclose(p, actual_p)
def get_layer_softmax(self, layer_id, layer_name): row = self.db.executeSQL( """ SELECT n_classes,irange,istdev,sparse_init,W_lr_scale,b_lr_scale, max_row_norm,no_affine,max_col_norm FROM hps3.layer_softmax WHERE layer_id = %s """, (layer_id, ), self.db.FETCH_ONE) if not row or row is None: raise HPSData("No softmax layer for layer_id=" + str(layer_id)) (n_classes, irange, istdev, sparse_init, W_lr_scale, b_lr_scale, max_row_norm, no_affine, max_col_norm) = row return Softmax(n_classes=n_classes, irange=irange, istdev=istdev, sparse_init=sparse_init, W_lr_scale=W_lr_scale, b_lr_scale=b_lr_scale, max_row_norm=max_row_norm, no_affine=no_affine, max_col_norm=max_col_norm, layer_name=layer_name)
def DBL_model_test1(basepath, cutoff=[-1, -1], pklname='', newdata=None): # data ishape = Conv2DSpace(shape=[48, 48], num_channels=1) preproc = [0, 0] nclass = 7 DBL = DBL_model(basepath, nclass, np.append(ishape.shape, 1), preproc, cutoff) # create layers nk = [30] #nk = [40,30,20] ks = [[8, 8], [5, 5], [3, 3]] ir = [0.05, 0.05, 0.05] ps = [[4, 4], [4, 4], [2, 2]] pd = [[2, 2], [2, 2], [2, 2]] kn = [0.9, 0.9, 0.9] layers = DBL_ConvLayers(nk, ks, ir, ps, pd, kn) layer_soft = Softmax( layer_name='y', #max_col_norm = 1.9365, n_classes=nclass, init_bias_target_marginals=DBL.ds_train, #istdev = .05 irange=.0) layers.append(layer_soft) # create DBL_model model = MLP(layers, input_space=ishape) if pklname != '' and os.path.isfile(pklname): # load and rebuild model layer_params = cPickle.load(open(pklname + '.cpu')) layer_id = 0 for layer in model.layers: if layer_id < len(layers) - 1: layer.set_weights(layer_params[layer_id][0]) layer.set_biases(layer_params[layer_id][1]) else: layer.set_weights(layer_params[layer_id][1]) layer.set_biases(layer_params[layer_id][0]) layer_id = layer_id + 1 DBL.model = model DBL.test_raw(newdata) else: algo_term = EpochCounter(500) # number of epoch iteration algo = SGD(learning_rate=0.001, batch_size=500, init_momentum=.5, monitoring_dataset=DBL.ds_valid, termination_criterion=algo_term) DBL.run_model(model, algo) # save the model if pklname != '': layer_params = [] for layer in layers: param = layer.get_params() print param print param[0].get_value() layer_params.append( [param[0].get_value(), param[1].get_value()]) #cPickle.dump(DBL,open(pklname, 'wb')) #cPickle.dump(layer_params, open(pklname + '.cpu', 'wb')) cPickle.dump(layer_params, open(pklname + '.cpu', 'wb')) print DBL.result_valid[1], DBL.result_test[1] return DBL.result_valid[1], DBL.result_test[1]
from pylearn2.costs.cost import MethodCost from pylearn2.datasets.mnist import MNIST from pylearn2.models.mlp import MLP, Sigmoid, Softmax from pylearn2.train import Train from pylearn2.training_algorithms.sgd import SGD from pylearn2.training_algorithms.learning_rule import Momentum, MomentumAdjustor from pylearn2.termination_criteria import EpochCounter train_set = MNIST(which_set='train', start=0, stop=50000) valid_set = MNIST(which_set='train', start=50000, stop=60000) test_set = MNIST(which_set='test') model = MLP(nvis=784, layers=[Sigmoid(layer_name='h', dim=500, irange=0.01), Softmax(layer_name='y', n_classes=10, irange=0.01)]) algorithm = SGD(batch_size=100, learning_rate=0.01, learning_rule=Momentum(init_momentum=0.5), monitoring_dataset={'train': train_set, 'valid': valid_set, 'test': test_set}, cost=MethodCost('cost_from_X'), termination_criterion=EpochCounter(10)) train = Train(dataset=train_set, model=model, algorithm=algorithm, save_path="mnist_example.pkl", save_freq=1, extensions=[MomentumAdjustor(start=5, saturate=6, final_momentum=0.95)]) train.main_loop()