def main(save_to, num_epochs): mlp = MLP([Tanh(), Softmax()], [784, 100, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() x = tensor.matrix('features') y = tensor.lmatrix('targets') probs = mlp.apply(tensor.flatten(x, outdim=2)) cost = CategoricalCrossEntropy().apply(y.flatten(), probs) error_rate = MisclassificationRate().apply(y.flatten(), probs) cg = ComputationGraph([cost]) W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables) cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum() cost.name = 'final_cost' mnist_train = MNIST(("train", )) mnist_test = MNIST(("test", )) algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Scale(learning_rate=0.1)) extensions = [ Timing(), FinishAfter(after_n_epochs=num_epochs), DataStreamMonitoring([cost, error_rate], Flatten(DataStream.default_stream( mnist_test, iteration_scheme=SequentialScheme( mnist_test.num_examples, 500)), which_sources=('features', )), prefix="test"), TrainingDataMonitoring([ cost, error_rate, aggregation.mean(algorithm.total_gradient_norm) ], prefix="train", after_epoch=True), Checkpoint(save_to), Printing() ] if BLOCKS_EXTRAS_AVAILABLE: extensions.append( Plot('MNIST example', channels=[[ 'test_final_cost', 'test_misclassificationrate_apply_error_rate' ], ['train_total_gradient_norm']])) main_loop = MainLoop(algorithm, Flatten(DataStream.default_stream( mnist_train, iteration_scheme=SequentialScheme( mnist_train.num_examples, 50)), which_sources=('features', )), model=Model(cost), extensions=extensions) main_loop.run()
def setup_model(): # shape: T x B x F input_ = T.tensor3('features') # shape: B target = T.lvector('targets') model = LSTMAttention(input_dim=10000, dim=500, mlp_hidden_dims=[2000, 500, 4], batch_size=100, image_shape=(100, 100), patch_shape=(28, 28), weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) model.initialize() h, c = model.apply(input_) classifier = MLP([Rectifier(), Softmax()], [500, 100, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) classifier.initialize() probabilities = classifier.apply(h[-1]) cost = CategoricalCrossEntropy().apply(target, probabilities) error_rate = MisclassificationRate().apply(target, probabilities) return cost, error_rate
def build_classifier(dimension): mlp = MLP([Tanh(),Tanh(), Softmax()], [784, 100,50, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() return mlp
def setup_model(configs): tensor5 = theano.tensor.TensorType(config.floatX, (False,) * 5) # shape: T x B x C x X x Y input_ = tensor5("features") tensor3 = theano.tensor.TensorType(config.floatX, (False,) * 3) locs = tensor3("locs") # shape: B x Classes target = T.ivector("targets") model = LSTMAttention(configs, weights_init=Glorot(), biases_init=Constant(0)) model.initialize() (h, c, location, scale, alpha, patch, downn_sampled_input, conved_part_1, conved_part_2, pre_lstm) = model.apply( input_, locs ) model.location = location model.scale = scale model.alpha = location model.patch = patch classifier = MLP( [Rectifier(), Softmax()], configs["classifier_dims"], weights_init=Glorot(), biases_init=Constant(0) ) classifier.initialize() probabilities = classifier.apply(h[-1]) cost = CategoricalCrossEntropy().apply(target, probabilities) cost.name = "CE" error_rate = MisclassificationRate().apply(target, probabilities) error_rate.name = "ER" model.cost = cost model.error_rate = error_rate model.probabilities = probabilities if configs["load_pretrained"]: blocks_model = Model(model.cost) all_params = blocks_model.parameters with open("VGG_CNN_params.npz") as f: loaded = np.load(f) all_conv_params = loaded.keys() for param in all_params: if param.name in loaded.keys(): assert param.get_value().shape == loaded[param.name].shape param.set_value(loaded[param.name]) all_conv_params.pop(all_conv_params.index(param.name)) print "the following parameters did not match: " + str(all_conv_params) if configs["test_model"]: print "TESTING THE MODEL: CHECK THE INPUT SIZE!" cg = ComputationGraph(model.cost) f = theano.function(cg.inputs, [model.cost], on_unused_input="ignore", allow_input_downcast=True) data = configs["get_streams"](configs["batch_size"])[0].get_epoch_iterator().next() f(data[1], data[0], data[2]) print "Test passed! ;)" model.monitorings = [cost, error_rate] return model
def build_model_mnist(): # CNN filter_size = (5, 5) activation = Rectifier().apply pooling_size = (2, 2) num_filters = 50 layer0 = ConvolutionalLayer(activation=activation, filter_size=filter_size, num_filters=num_filters, pooling_size=pooling_size, weights_init=Uniform(width=0.1), biases_init=Uniform(width=0.01), name="layer_0") filter_size = (3, 3) activation = Rectifier().apply num_filters = 20 layer1 = ConvolutionalLayer(activation=activation, filter_size=filter_size, num_filters=num_filters, pooling_size=pooling_size, weights_init=Uniform(width=0.1), biases_init=Uniform(width=0.01), name="layer_1") conv_layers = [layer0, layer1] convnet = ConvolutionalSequence(conv_layers, num_channels= 1, image_size=(28, 28)) convnet.initialize() output_dim = np.prod(convnet.get_dim('output')) mlp = MLP(activations=[Identity()], dims=[output_dim, 10], weights_init=Uniform(width=0.1), biases_init=Uniform(width=0.01), name="layer_2") mlp.initialize() classifier = Classifier(convnet, mlp) classifier.initialize() return classifier
def build_mlp(features_car_cat, features_car_int, features_nocar_cat, features_nocar_int, features_cp, features_hascar, means, labels): prediction, _, _, _, = \ build_mlp_onlyloc(features_car_cat, features_car_int, features_nocar_cat, features_nocar_int, features_cp, features_hascar, means, labels) mlp_crm = MLP(activations=[None], dims=[1, 1], weights_init=IsotropicGaussian(.1), biases_init=Constant(0), name='mlp_crm') mlp_crm.initialize() crm = features_nocar_int[:, 0][:, None] prediction = prediction * mlp_crm.apply(crm) cost = MAPECost().apply(labels, prediction) cg = ComputationGraph(cost) input_var = VariableFilter(roles=[INPUT])(cg.variables) print input_var cg_dropout = apply_dropout(cg, [input_var[7], input_var[5]], .4) cost_dropout = cg_dropout.outputs[0] return prediction, cost_dropout, cg_dropout.parameters, cost
def build_mlp(features_int, features_cat, labels, labels_mean): inputs = tensor.concatenate([features_int, features_cat], axis=1) mlp = MLP(activations=[Rectifier(), Rectifier(), Rectifier(), None], dims=[337, 800, 1200, 1], weights_init=IsotropicGaussian(), biases_init=Constant(1)) mlp.initialize() prediction = mlp.apply(inputs) cost = MAPECost().apply(prediction, labels, labels_mean) cg = ComputationGraph(cost) #cg_dropout0 = apply_dropout(cg, [VariableFilter(roles=[INPUT])(cg.variables)[1]], .2) cg_dropout1 = apply_dropout(cg, [ VariableFilter(roles=[OUTPUT])(cg.variables)[1], VariableFilter(roles=[OUTPUT])(cg.variables)[3], VariableFilter(roles=[OUTPUT])(cg.variables)[5] ], .2) cost_dropout1 = cg_dropout1.outputs[0] return cost_dropout1, cg_dropout1.parameters, cost #cost, cg.parameters, cost #
def create_vae(x=None, batch=batch_size): x = T.matrix('features') if x is None else x x = x / 255. encoder = MLP( activations=[Rectifier(), Logistic()], dims=[img_dim**2, hidden_dim, 2*latent_dim], weights_init=IsotropicGaussian(std=0.01, mean=0), biases_init=Constant(0.01), name='encoder' ) encoder.initialize() z_param = encoder.apply(x) z_mean, z_log_std = z_param[:,latent_dim:], z_param[:,:latent_dim] z = Sampling(theano_seed=seed).apply([z_mean, z_log_std], batch=batch_size) decoder = MLP( activations=[Rectifier(), Logistic()], dims=[latent_dim, hidden_dim, img_dim**2], weights_init=IsotropicGaussian(std=0.01, mean=0), biases_init=Constant(0.01), name='decoder' ) decoder.initialize() x_reconstruct = decoder.apply(z) cost = VAEloss().apply(x, x_reconstruct, z_mean, z_log_std) cost.name = 'vae_cost' return cost
def create_base_model(self, x, y, input_dim, interim_dim=30): # Create the output of the MLP mlp = MLP([Tanh(), Tanh(), Tanh()], [input_dim, 60, 60, interim_dim], weights_init=IsotropicGaussian(0.001), biases_init=Constant(0)) mlp.initialize() inter = mlp.apply(x) fine_tuner = MLP([Logistic()], [interim_dim, 1], weights_init=IsotropicGaussian(0.001), biases_init=Constant(0)) fine_tuner.initialize() probs = fine_tuner.apply(inter) #sq_err = BinaryCrossEntropy() err = T.sqr(y.flatten() - probs.flatten()) # cost = T.mean(err * y.flatten() * (1 - self.p) + err * # (1 - y.flatten()) * self.p) cost = T.mean(err) #cost = sq_err.apply(probs.flatten(), y.flatten()) # cost = T.mean(y.flatten() * T.log(probs.flatten()) + # (1 - y.flatten()) * T.log(1 - probs.flatten())) cost.name = 'cost' pred_out = probs > 0.5 mis_cost = T.sum(T.neq(y.flatten(), pred_out.flatten())) mis_cost.name = 'MisclassificationRate' return mlp, fine_tuner, cost, mis_cost
def create_model(self, x, y, input_dim, tol=10e-5): # Create the output of the MLP mlp = MLP( [Rectifier(), Rectifier(), Logistic()], [input_dim, 100, 100, 1], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() probs = mlp.apply(x) y = y.dimshuffle(0, 'x') # Create the if-else cost function true_p = (T.sum(y * probs) + tol) * 1.0 / (T.sum(y) + tol) true_n = (T.sum((1 - y) * (1 - probs)) + tol) * \ 1.0 / (T.sum(1 - y) + tol) #p = (T.sum(y) + tol) / (y.shape[0] + tol) theta = (1 - self.p) / self.p numerator = (1 + self.beta**2) * true_p denominator = self.beta**2 + theta + true_p - theta * true_n Fscore = numerator / denominator cost = -1 * Fscore cost.name = "cost" return mlp, cost, probs
def test_pylearn2_trainin(): # Construct the model mlp = MLP(activations=[Sigmoid(), Sigmoid()], dims=[784, 100, 784], weights_init=IsotropicGaussian(), biases_init=Constant(0.01)) mlp.initialize() cost = SquaredError() block_cost = BlocksCost(cost) block_model = BlocksModel(mlp, (VectorSpace(dim=784), 'features')) # Load the data rng = numpy.random.RandomState(14) train_dataset = random_dense_design_matrix(rng, 1024, 784, 10) valid_dataset = random_dense_design_matrix(rng, 1024, 784, 10) # Silence Pylearn2's logger logger = logging.getLogger(pylearn2.__name__) logger.setLevel(logging.ERROR) # Training algorithm sgd = SGD(learning_rate=0.01, cost=block_cost, batch_size=128, monitoring_dataset=valid_dataset) train = Train(train_dataset, block_model, algorithm=sgd) train.main_loop(time_budget=3)
def main(save_to, num_batches, continue_=False): mlp = MLP([Tanh(), Identity()], [1, 10, 1], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), seed=1) mlp.initialize() x = tensor.vector('numbers') y = tensor.vector('roots') cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None])) cost.name = "cost" main_loop = MainLoop( GradientDescent(cost=cost, params=ComputationGraph(cost).parameters, step_rule=Scale(learning_rate=0.001)), get_data_stream(range(100)), model=Model(cost), extensions=([LoadFromDump(save_to)] if continue_ else []) + [ Timing(), FinishAfter(after_n_batches=num_batches), DataStreamMonitoring( [cost], get_data_stream(range(100, 200)), prefix="test"), TrainingDataMonitoring([cost], after_epoch=True), Dump(save_to), Printing() ]) main_loop.run() return main_loop
def main(save_to, num_batches, continue_=False): mlp = MLP([Tanh(), Identity()], [1, 10, 1], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), seed=1) mlp.initialize() x = tensor.vector('numbers') y = tensor.vector('roots') cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None])) cost.name = "cost" main_loop = MainLoop( GradientDescent( cost=cost, params=ComputationGraph(cost).parameters, step_rule=Scale(learning_rate=0.001)), get_data_stream(range(100)), model=Model(cost), extensions=([LoadFromDump(save_to)] if continue_ else []) + [Timing(), FinishAfter(after_n_batches=num_batches), DataStreamMonitoring( [cost], get_data_stream(range(100, 200)), prefix="test"), TrainingDataMonitoring([cost], after_epoch=True), Dump(save_to), Printing()]) main_loop.run() return main_loop
def build_mlp(features_car_cat, features_car_int, features_nocar_cat, features_nocar_int, features_cp, features_hascar, means, labels): features = tensor.concatenate([ features_hascar, means['cp'][features_cp[:, 0]], means['dep'][features_cp[:, 1]] ], axis=1) mlp = MLP(activations=[Rectifier(), Rectifier(), None], dims=[5, 50, 50, 1], weights_init=IsotropicGaussian(.1), biases_init=Constant(0), name='mlp') mlp.initialize() prediction = mlp.apply(features) cost = MAPECost().apply(labels, prediction) cg = ComputationGraph(cost) input_var = VariableFilter(roles=[INPUT])(cg.variables) print input_var cg_dropout1 = apply_dropout(cg, [input_var[3], input_var[5]], .4) cost_dropout1 = cg_dropout1.outputs[0] return prediction, cost_dropout1, cg_dropout1.parameters, cost
def test_pylearn2_training(): # Construct the model mlp = MLP(activations=[Sigmoid(), Sigmoid()], dims=[784, 100, 784], weights_init=IsotropicGaussian(), biases_init=Constant(0.01)) mlp.initialize() cost = SquaredError() # Load the data rng = numpy.random.RandomState(14) train_dataset = random_dense_design_matrix(rng, 1024, 784, 10) valid_dataset = random_dense_design_matrix(rng, 1024, 784, 10) x = tensor.matrix('features') block_cost = Pylearn2Cost(cost.apply(x, mlp.apply(x))) block_model = Pylearn2Model(mlp) # Silence Pylearn2's logger logger = logging.getLogger(pylearn2.__name__) logger.setLevel(logging.ERROR) # Training algorithm sgd = SGD(learning_rate=0.01, cost=block_cost, batch_size=128, monitoring_dataset=valid_dataset) train = Pylearn2Train(train_dataset, block_model, algorithm=sgd) train.main_loop(time_budget=3)
def setupNN(NNParam): NNWidth = NNParam['NNWidth'] WeightStdDev = NNParam['WeightStdDev'] L2Weight = NNParam['L2Weight'] DropOutProb = NNParam['DropOutProb'] InitialLearningRate = NNParam['InitialLearningRate'] x = theano.tensor.concatenate([x0, x1, x2, x3], axis=1) mlp = MLP(activations=[Rectifier(), Rectifier(), Rectifier(), Rectifier(), Rectifier()], dims=[69*4, NNWidth, NNWidth, NNWidth, NNWidth, 100], weights_init=IsotropicGaussian(WeightStdDev), biases_init=Constant(0)) x_forward = mlp.apply(x) mlp_sm = MLP(activations=[None], dims=[100, 39], weights_init=IsotropicGaussian(WeightStdDev), biases_init=Constant(0)) y_hat_b = Softmax().apply(mlp_sm.apply(x_forward)) mlp.initialize() mlp_sm.initialize() cg = blocks.graph.ComputationGraph(y_hat_b) parameters = list() for p in cg.parameters: parameters.append(p) weights = VariableFilter(roles=[blocks.roles.WEIGHT])(cg.variables) cg_dropout = blocks.graph.apply_dropout(cg,[weights[3]] , DropOutProb) y_hat_b_do = cg_dropout.outputs[0] pred_b = theano.tensor.argmax(cg.outputs[0],axis=1) err_b = theano.tensor.mean(theano.tensor.eq(pred_b,y_b)) cW = 0 for W in weights: cW += (W**2).sum() cost = theano.tensor.mean(theano.tensor.nnet.categorical_crossentropy(y_hat_b_do, y_b)) + cW*L2Weight Learning_Rate_Decay = numpy.float32(0.98) learning_rate_theano = theano.shared(numpy.float32(InitialLearningRate), name='learning_rate') learning_rate_update = theano.function(inputs=[],outputs=learning_rate_theano,updates=[(learning_rate_theano,learning_rate_theano*Learning_Rate_Decay)]) update_proc = momentum_sgd(cost,parameters,0.8, learning_rate_theano) #train training_proc = theano.function( inputs=[shuffIdx], outputs=cost, updates=update_proc, givens={x0:tX[theano.tensor.flatten(shuffIdx[:,0])], x1:tX[theano.tensor.flatten(shuffIdx[:,1])], x2:tX[theano.tensor.flatten(shuffIdx[:,2])], x3:tX[theano.tensor.flatten(shuffIdx[:,3])], y_b:tYb[theano.tensor.flatten(shuffIdx[:,1])]}) #test test_on_testing_proc = theano.function( inputs=[shuffIdx], outputs=[err_b], givens={x0:vX[shuffIdx[:,0]],x1:vX[shuffIdx[:,1]],x2:vX[shuffIdx[:,2]],x3:vX[shuffIdx[:,3]],y_b:vYb[shuffIdx[:,1]]}) test_on_training_proc = theano.function( inputs=[shuffIdx], outputs=[err_b], givens={x0:tX[shuffIdx[:,0]],x1:tX[shuffIdx[:,1]],x2:tX[shuffIdx[:,2]],x3:tX[shuffIdx[:,3]],y_b:tYb[shuffIdx[:,1]]}) forward_proc = theano.function(inputs=[x0,x1,x2,x3],outputs=[x_forward]) return (learning_rate_update, training_proc, test_on_testing_proc,test_on_training_proc,forward_proc)
def setup_model(configs): tensor5 = theano.tensor.TensorType(config.floatX, (False,) * 5) # shape: T x B x C x X x Y input_ = tensor5('features') # shape: B x Classes target = T.lmatrix('targets') model = LSTMAttention( configs, weights_init=Glorot(), biases_init=Constant(0)) model.initialize() (h, c, location, scale, patch, downn_sampled_input, conved_part_1, conved_part_2, pre_lstm) = model.apply(input_) classifier = MLP( [Rectifier(), Logistic()], configs['classifier_dims'], weights_init=Glorot(), biases_init=Constant(0)) classifier.initialize() probabilities = classifier.apply(h[-1]) cost = BinaryCrossEntropy().apply(target, probabilities) cost.name = 'CE' error_rate = MisclassificationRate().apply(target, probabilities) error_rate.name = 'ER' model.cost = cost if configs['load_pretrained']: blocks_model = Model(model.cost) all_params = blocks_model.parameters with open('VGG_CNN_params.npz') as f: loaded = np.load(f) all_conv_params = loaded.keys() for param in all_params: if param.name in loaded.keys(): assert param.get_value().shape == loaded[param.name].shape param.set_value(loaded[param.name]) all_conv_params.pop(all_conv_params.index(param.name)) print "the following parameters did not match: " + str(all_conv_params) if configs['test_model']: cg = ComputationGraph(model.cost) f = theano.function(cg.inputs, [model.cost], on_unused_input='ignore', allow_input_downcast=True) data = np.random.randn(10, 40, 3, 224, 224) targs = np.random.randn(40, 101) f(data, targs) print "Test passed! ;)" model.monitorings = [cost, error_rate] return model
def task_ID_layers(x, recurrent_in_size): mlp = MLP([Rectifier()] * (len(task_ID_FF_dims)-1), task_ID_FF_dims, name='task_ID_mlp', weights_init=Uniform(width=.2), biases_init=Constant(0)) mlp.push_initialization_config() mlp.initialize() out_size = task_ID_FF_dims[-1] + recurrent_in_size - len(game_tasks) zero_padded_task_IDs = T.concatenate([x[:,:,-len(game_tasks):], T.zeros((x.shape[0], x.shape[1], task_ID_FF_dims[0] - len(game_tasks)))], axis=2) mlp_out = mlp.apply(zero_padded_task_IDs) task_ID_out = T.concatenate([x[:,:,:-len(game_tasks)]] + [mlp_out], axis=2) return task_ID_out, out_size
def test_snapshot(): x = tensor.matrix('x') linear = MLP([Identity(), Identity()], [10, 10, 10], weights_init=Constant(1), biases_init=Constant(2)) linear.initialize() y = linear.apply(x) cg = ComputationGraph(y) snapshot = cg.get_snapshot(dict(x=numpy.zeros((1, 10), dtype=floatX))) assert len(snapshot) == 14
def test_serialization(): # Create a simple brick with two parameters mlp = MLP(activations=[None, None], dims=[10, 10, 10], weights_init=Constant(1.), use_bias=False) mlp.initialize() W = mlp.linear_transformations[1].W W.set_value(W.get_value() * 2) # Check the data using numpy.load with NamedTemporaryFile(delete=False) as f: dump(mlp, f) numpy_data = numpy.load(f.name) assert set(numpy_data.keys()) == \ set(['mlp-linear_0.W', 'mlp-linear_1.W', 'pkl']) assert_allclose(numpy_data['mlp-linear_0.W'], numpy.ones((10, 10))) assert numpy_data['mlp-linear_0.W'].dtype == theano.config.floatX # Ensure that it can be unpickled mlp = load(f.name) assert_allclose(mlp.linear_transformations[1].W.get_value(), numpy.ones((10, 10)) * 2) # Ensure that only parameters are saved as NPY files mlp.random_data = numpy.random.rand(10) with NamedTemporaryFile(delete=False) as f: dump(mlp, f) numpy_data = numpy.load(f.name) assert set(numpy_data.keys()) == \ set(['mlp-linear_0.W', 'mlp-linear_1.W', 'pkl']) # Ensure that parameters can be loaded with correct names parameter_values = load_parameter_values(f.name) assert set(parameter_values.keys()) == \ set(['/mlp/linear_0.W', '/mlp/linear_1.W']) # Ensure that duplicate names are dealt with for child in mlp.children: child.name = 'linear' with NamedTemporaryFile(delete=False) as f: dump(mlp, f) numpy_data = numpy.load(f.name) assert set(numpy_data.keys()) == \ set(['mlp-linear.W', 'mlp-linear.W_2', 'pkl']) # Ensure warnings are raised when __main__ namespace objects are dumped foo.__module__ = '__main__' import __main__ __main__.__dict__['foo'] = foo mlp.foo = foo with NamedTemporaryFile(delete=False) as f: with warnings.catch_warnings(record=True) as w: dump(mlp, f) assert len(w) == 1 assert '__main__' in str(w[-1].message)
def create_model(self): x = self.x input_dim = self.input_dim mlp = MLP([Logistic(), Logistic(), Tanh()], [input_dim, 100, 100, 1], weights_init=IsotropicGaussian(0.001), biases_init=Constant(0)) mlp.initialize() self.mlp = mlp probs = mlp.apply(x) return probs
def test_fully_layer(): batch_size=2 x = T.tensor4(); y = T.ivector() V = 200 layer_conv = Convolutional(filter_size=(5,5),num_filters=V, name="toto", weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.0)) # try with no bias activation = Rectifier() pool = MaxPooling(pooling_size=(2,2)) convnet = ConvolutionalSequence([layer_conv, activation, pool], num_channels=15, image_size=(10,10), name="conv_section") convnet.push_allocation_config() convnet.initialize() output=convnet.apply(x) batch_size=output.shape[0] output_dim=np.prod(convnet.get_dim('output')) result_conv = output.reshape((batch_size, output_dim)) mlp=MLP(activations=[Rectifier().apply], dims=[output_dim, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.0)) mlp.initialize() output=mlp.apply(result_conv) cost = T.mean(Softmax().categorical_cross_entropy(y.flatten(), output)) cg = ComputationGraph(cost) W = VariableFilter(roles=[WEIGHT])(cg.variables) B = VariableFilter(roles=[BIAS])(cg.variables) W = W[0]; b = B[0] inputs_fully = VariableFilter(roles=[INPUT], bricks=[Linear])(cg) outputs_fully = VariableFilter(roles=[OUTPUT], bricks=[Linear])(cg) var_input=inputs_fully[0] var_output=outputs_fully[0] [d_W,d_S,d_b] = T.grad(cost, [W, var_output, b]) d_b = d_b.dimshuffle(('x',0)) d_p = T.concatenate([d_W, d_b], axis=0) x_value = 1e3*np.random.ranf((2,15, 10, 10)) f = theano.function([x,y], [var_input, d_S, d_p], allow_input_downcast=True, on_unused_input='ignore') A, B, C= f(x_value, [5, 0]) A = np.concatenate([A, np.ones((2,1))], axis=1) print 'A', A.shape print 'B', B.shape print 'C', C.shape print lin.norm(C - np.dot(np.transpose(A), B), 'fro') return """
def main(save_to, num_epochs, bokeh=False): mlp = MLP([Tanh(), Softmax()], [784, 100, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() x = tensor.matrix('features') y = tensor.lmatrix('targets') probs = mlp.apply(x) cost = CategoricalCrossEntropy().apply(y.flatten(), probs) error_rate = MisclassificationRate().apply(y.flatten(), probs) cg = ComputationGraph([cost]) W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables) cost = cost + .00005 * (W1 ** 2).sum() + .00005 * (W2 ** 2).sum() cost.name = 'final_cost' mnist_train = MNIST("train") mnist_test = MNIST("test") algorithm = GradientDescent( cost=cost, params=cg.parameters, step_rule=Scale(learning_rate=0.1)) extensions = [Timing(), FinishAfter(after_n_epochs=num_epochs), DataStreamMonitoring( [cost, error_rate], DataStream(mnist_test, iteration_scheme=SequentialScheme( mnist_test.num_examples, 500)), prefix="test"), TrainingDataMonitoring( [cost, error_rate, aggregation.mean(algorithm.total_gradient_norm)], prefix="train", after_epoch=True), Checkpoint(save_to), Printing()] if bokeh: extensions.append(Plot( 'MNIST example', channels=[ ['test_final_cost', 'test_misclassificationrate_apply_error_rate'], ['train_total_gradient_norm']])) main_loop = MainLoop( algorithm, DataStream(mnist_train, iteration_scheme=SequentialScheme( mnist_train.num_examples, 50)), model=Model(cost), extensions=extensions) main_loop.run()
def create_model(self): input_dim = self.input_dim x = self.x y = self.y p = self.p mask = self.mask hidden_dim = self.hidden_dim embedding_dim = self.embedding_dim lookup = LookupTable(self.dict_size, embedding_dim, weights_init=IsotropicGaussian(0.001), name='LookupTable') x_to_h = Linear(embedding_dim, hidden_dim * 4, name='x_to_h', weights_init=IsotropicGaussian(0.001), biases_init=Constant(0.0)) lstm = LSTM(hidden_dim, name='lstm', weights_init=IsotropicGaussian(0.001), biases_init=Constant(0.0)) h_to_o = MLP([Logistic()], [hidden_dim, 1], weights_init=IsotropicGaussian(0.001), biases_init=Constant(0), name='h_to_o') lookup.initialize() x_to_h.initialize() lstm.initialize() h_to_o.initialize() embed = lookup.apply(x).reshape( (x.shape[0], x.shape[1], self.embedding_dim)) embed.name = "embed_vec" x_transform = x_to_h.apply(embed.transpose(1, 0, 2)) x_transform.name = "Transformed X" self.lookup = lookup self.x_to_h = x_to_h self.lstm = lstm self.h_to_o = h_to_o #if mask is None: h, c = lstm.apply(x_transform) #else: #h, c = lstm.apply(x_transform, mask=mask) h.name = "hidden_state" c.name = "cell state" # only values of hidden units of the last timeframe are used for # the classification indices = T.sum(mask, axis=0) - 1 rel_hid = h[indices, T.arange(h.shape[1])] out = self.h_to_o.apply(rel_hid) probs = out return probs
def test_add_to_dump(): # Create a simple MLP to dump. mlp = MLP(activations=[None, None], dims=[10, 10, 10], weights_init=Constant(1.), use_bias=False) mlp.initialize() W = mlp.linear_transformations[1].W W.set_value(W.get_value() * 2) mlp2 = MLP(activations=[None, None], dims=[10, 10, 10], weights_init=Constant(1.), use_bias=False, name='mlp2') mlp2.initialize() # Ensure that adding to dump is working. with NamedTemporaryFile(delete=False) as f: dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W]) with open(f.name, 'rb+') as ff: add_to_dump(mlp.children[0], ff, 'child_0', parameters=[mlp.children[0].W]) add_to_dump(mlp.children[1], ff, 'child_1') with tarfile.open(f.name, 'r') as tarball: assert set(tarball.getnames()) == set( ['_pkl', '_parameters', 'child_0', 'child_1']) # Ensure that we can load any object from the tarball. with open(f.name, 'rb') as ff: saved_children_0 = load(ff, 'child_0') saved_children_1 = load(ff, 'child_1') assert_allclose(saved_children_0.W.get_value(), numpy.ones((10, 10))) assert_allclose(saved_children_1.W.get_value(), numpy.ones((10, 10)) * 2) # Check the error if using a reserved name. with open(f.name, 'rb+') as ff: assert_raises(ValueError, add_to_dump, *[mlp.children[0], ff, '_pkl']) # Check the error if saving an object with other parameters with open(f.name, 'rb+') as ff: assert_raises( ValueError, add_to_dump, *[mlp2, ff, 'mlp2'], **dict(parameters=[mlp2.children[0].W, mlp2.children[1].W])) # Check the warning if adding to a dump with no parameters with NamedTemporaryFile(delete=False) as f: dump(mlp, f) with open(f.name, 'rb+') as ff: assert_raises( ValueError, add_to_dump, *[mlp2, ff, 'mlp2'], **dict(parameters=[mlp2.children[0].W, mlp2.children[1].W]))
def build_model(self, hidden_dim): board_input = T.vector('input') mlp = MLP(activations=[LeakyRectifier(0.1), LeakyRectifier(0.1)], dims=[9, hidden_dim, 9], weights_init=IsotropicGaussian(0.00001), biases_init=Constant(0.01)) output = mlp.apply(board_input) masked_output = Softmax().apply(output * T.eq(board_input, 0) * 1000) mlp.initialize() cost, chosen = self.get_cost(masked_output) return board_input, mlp, cost, chosen, output
def test_serialization(): # Create a simple MLP to dump. mlp = MLP(activations=[None, None], dims=[10, 10, 10], weights_init=Constant(1.), use_bias=False) mlp.initialize() W = mlp.linear_transformations[1].W W.set_value(W.get_value() * 2) # Ensure warnings are raised when __main__ namespace objects are dumped. foo.__module__ = '__main__' import __main__ __main__.__dict__['foo'] = foo mlp.foo = foo with NamedTemporaryFile(delete=False) as f: with warnings.catch_warnings(record=True) as w: dump(mlp.foo, f) assert len(w) == 1 assert '__main__' in str(w[-1].message) # Check the parameters. with NamedTemporaryFile(delete=False) as f: dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W]) with open(f.name, 'rb') as ff: numpy_data = load_parameters(ff) assert set(numpy_data.keys()) == \ set(['/mlp/linear_0.W', '/mlp/linear_1.W']) assert_allclose(numpy_data['/mlp/linear_0.W'], numpy.ones((10, 10))) assert numpy_data['/mlp/linear_0.W'].dtype == theano.config.floatX # Ensure that it can be unpickled. with open(f.name, 'rb') as ff: mlp = load(ff) assert_allclose(mlp.linear_transformations[1].W.get_value(), numpy.ones((10, 10)) * 2) # Ensure that duplicate names are dealt with. for child in mlp.children: child.name = 'linear' with NamedTemporaryFile(delete=False) as f: dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W]) with open(f.name, 'rb') as ff: numpy_data = load_parameters(ff) assert set(numpy_data.keys()) == \ set(['/mlp/linear.W', '/mlp/linear.W_2']) # Check when we don't dump the main object. with NamedTemporaryFile(delete=False) as f: dump(None, f, parameters=[mlp.children[0].W, mlp.children[1].W]) with tarfile.open(f.name, 'r') as tarball: assert set(tarball.getnames()) == set(['_parameters'])
def apply(self, input_, target): mlp = MLP(self.non_lins, self.dims, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), name=self.name) mlp.initialize() probs = mlp.apply(T.flatten(input_, outdim=2)) probs.name = 'probs' cost = CategoricalCrossEntropy().apply(target.flatten(), probs) cost.name = "CE" self.outputs = {} self.outputs['probs'] = probs self.outputs['cost'] = cost
def main(save_to, num_epochs): mlp = MLP([Tanh(), Softmax()], [784, 100, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() x = tensor.matrix('features') y = tensor.lmatrix('targets') probs = mlp.apply(x) cost = CategoricalCrossEntropy().apply(y.flatten(), probs) error_rate = MisclassificationRate().apply(y.flatten(), probs) cg = ComputationGraph([cost]) W1, W2 = VariableFilter(roles=[WEIGHTS])(cg.variables) cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum() cost.name = 'final_cost' mnist_train = MNIST("train") mnist_test = MNIST("test") algorithm = GradientDescent(cost=cost, step_rule=SteepestDescent(learning_rate=0.1)) main_loop = MainLoop( mlp, DataStream(mnist_train, iteration_scheme=SequentialScheme(mnist_train.num_examples, 50)), algorithm, extensions=[ Timing(), FinishAfter(after_n_epochs=num_epochs), DataStreamMonitoring([cost, error_rate], DataStream(mnist_test, iteration_scheme=SequentialScheme( mnist_test.num_examples, 500)), prefix="test"), TrainingDataMonitoring([ cost, error_rate, aggregation.mean(algorithm.total_gradient_norm) ], prefix="train", after_every_epoch=True), SerializeMainLoop(save_to), Plot('MNIST example', channels=[[ 'test_final_cost', 'test_misclassificationrate_apply_error_rate' ], ['train_total_gradient_norm']]), Printing() ]) main_loop.run()
def test_add_to_dump(): # Create a simple MLP to dump. mlp = MLP(activations=[None, None], dims=[10, 10, 10], weights_init=Constant(1.), use_bias=False) mlp.initialize() W = mlp.linear_transformations[1].W W.set_value(W.get_value() * 2) mlp2 = MLP(activations=[None, None], dims=[10, 10, 10], weights_init=Constant(1.), use_bias=False, name='mlp2') mlp2.initialize() # Ensure that adding to dump is working. with NamedTemporaryFile(delete=False) as f: dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W]) with open(f.name, 'rb+') as ff: add_to_dump(mlp.children[0], ff, 'child_0', parameters=[mlp.children[0].W]) add_to_dump(mlp.children[1], ff, 'child_1') with tarfile.open(f.name, 'r') as tarball: assert set(tarball.getnames()) == set(['_pkl', '_parameters', 'child_0', 'child_1']) # Ensure that we can load any object from the tarball. with open(f.name, 'rb') as ff: saved_children_0 = load(ff, 'child_0') saved_children_1 = load(ff, 'child_1') assert_allclose(saved_children_0.W.get_value(), numpy.ones((10, 10))) assert_allclose(saved_children_1.W.get_value(), numpy.ones((10, 10)) * 2) # Check the error if using a reserved name. with open(f.name, 'rb+') as ff: assert_raises(ValueError, add_to_dump, *[mlp.children[0], ff, '_pkl']) # Check the error if saving an object with other parameters with open(f.name, 'rb+') as ff: assert_raises(ValueError, add_to_dump, *[mlp2, ff, 'mlp2'], **dict(parameters=[mlp2.children[0].W, mlp2.children[1].W])) # Check the warning if adding to a dump with no parameters with NamedTemporaryFile(delete=False) as f: dump(mlp, f) with open(f.name, 'rb+') as ff: assert_raises(ValueError, add_to_dump, *[mlp2, ff, 'mlp2'], **dict(parameters=[mlp2.children[0].W, mlp2.children[1].W]))
def setup_model(): # shape: T x B x F input_ = T.tensor3('features') # shape: B target = T.lvector('targets') model = LSTMAttention(dim=256, mlp_hidden_dims=[256, 4], batch_size=100, image_shape=(64, 64), patch_shape=(16, 16), weights_init=Glorot(), biases_init=Constant(0)) model.initialize() h, c, location, scale = model.apply(input_) classifier = MLP([Rectifier(), Softmax()], [256 * 2, 200, 10], weights_init=Glorot(), biases_init=Constant(0)) model.h = h model.c = c model.location = location model.scale = scale classifier.initialize() probabilities = classifier.apply(T.concatenate([h[-1], c[-1]], axis=1)) cost = CategoricalCrossEntropy().apply(target, probabilities) error_rate = MisclassificationRate().apply(target, probabilities) model.cost = cost location_x_0_avg = T.mean(location[0, :, 0]) location_x_0_avg.name = 'location_x_0_avg' location_x_10_avg = T.mean(location[10, :, 0]) location_x_10_avg.name = 'location_x_10_avg' location_x_20_avg = T.mean(location[-1, :, 0]) location_x_20_avg.name = 'location_x_20_avg' scale_x_0_avg = T.mean(scale[0, :, 0]) scale_x_0_avg.name = 'scale_x_0_avg' scale_x_10_avg = T.mean(scale[10, :, 0]) scale_x_10_avg.name = 'scale_x_10_avg' scale_x_20_avg = T.mean(scale[-1, :, 0]) scale_x_20_avg.name = 'scale_x_20_avg' monitorings = [ error_rate, location_x_0_avg, location_x_10_avg, location_x_20_avg, scale_x_0_avg, scale_x_10_avg, scale_x_20_avg ] model.monitorings = monitorings return model
def main(save_to, num_epochs, batch_size): mlp = MLP([Tanh(), Tanh(), Tanh(), Softmax()], [3072, 4096, 1024, 512, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() x = tt.tensor4('features', dtype='float32') y = tt.vector('label', dtype='int32') probs = mlp.apply(x.reshape((-1, 3072))) cost = CategoricalCrossEntropy().apply(y, probs) error_rate = MisclassificationRate().apply(y, probs) cg = ComputationGraph([cost]) ws = VariableFilter(roles=[WEIGHT])(cg.variables) cost = cost + .00005 * sum(([(w**2).sum() for w in ws])) cost.name = 'final_cost' train_dataset = Cifar10Dataset(data_dir='/home/belohlavek/data/cifar10', is_train=True) valid_dataset = Cifar10Dataset(data_dir='/home/belohlavek/data/cifar10', is_train=False) train_stream = train_dataset.get_stream(batch_size) valid_stream = valid_dataset.get_stream(batch_size) algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Adam(learning_rate=0.001)) extensions = [ Timing(), LogExtension('/home/belohlavek/ALI/mlp.log'), FinishAfter(after_n_epochs=num_epochs), DataStreamMonitoring([cost, error_rate], valid_stream, prefix="test"), TrainingDataMonitoring([ cost, error_rate, aggregation.mean(algorithm.total_gradient_norm) ], prefix="train", after_epoch=True), Checkpoint(save_to), Printing() ] main_loop = MainLoop(algorithm, train_stream, model=Model(cost), extensions=extensions) main_loop.run()
def create_model(): """Create the deep autoencoder model with Blocks, and load MNIST.""" mlp = MLP(activations=[Logistic(), Logistic(), Logistic(), None, Logistic(), Logistic(), Logistic(), Logistic()], dims=[784, 1000, 500, 250, 30, 250, 500, 1000, 784], weights_init=Sparse(15, IsotropicGaussian()), biases_init=Constant(0)) mlp.initialize() x = tensor.matrix('features') x_hat = mlp.apply(tensor.flatten(x, outdim=2)) squared_err = SquaredError().apply(tensor.flatten(x, outdim=2), x_hat) cost = BinaryCrossEntropy().apply(tensor.flatten(x, outdim=2), x_hat) return x, cost, squared_err
def create_model(self, x, y, input_dim, p): # Create the output of the MLP mlp = MLP([Tanh(), Tanh(), Logistic()], [input_dim, 200, 100, 1], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() probs = mlp.apply(x).sum() # Create the if-else cost function reward = (probs * y * 1.0) / p + (1 - probs) * (1 - y) * 1.0 / (1 - p) cost = -reward # Negative of reward cost.name = "cost" return mlp, cost
def create_base_model(self, x, y, input_dim): # Create the output of the MLP mlp = MLP([Tanh(), Tanh(), Logistic()], [input_dim, 100, 100, 1], weights_init=IsotropicGaussian(0.001), biases_init=Constant(0)) mlp.initialize() probs = mlp.apply(x) #sq_err = SquaredError() cost = T.mean(T.sqr(y.flatten() - probs.flatten())) cost.name = 'cost' pred_out = probs > 0.5 mis_cost = T.mean(T.neq(y.flatten(), pred_out.flatten())) mis_cost.name = 'MisclassificationRate' return mlp, cost, mis_cost
def setup_model(): # shape: T x B x F input_ = T.tensor3('features') # shape: B target = T.lvector('targets') model = LSTMAttention(dim=256, mlp_hidden_dims=[256, 4], batch_size=100, image_shape=(64, 64), patch_shape=(16, 16), weights_init=Glorot(), biases_init=Constant(0)) model.initialize() h, c, location, scale = model.apply(input_) classifier = MLP([Rectifier(), Softmax()], [256 * 2, 200, 10], weights_init=Glorot(), biases_init=Constant(0)) model.h = h model.c = c model.location = location model.scale = scale classifier.initialize() probabilities = classifier.apply(T.concatenate([h[-1], c[-1]], axis=1)) cost = CategoricalCrossEntropy().apply(target, probabilities) error_rate = MisclassificationRate().apply(target, probabilities) model.cost = cost location_x_0_avg = T.mean(location[0, :, 0]) location_x_0_avg.name = 'location_x_0_avg' location_x_10_avg = T.mean(location[10, :, 0]) location_x_10_avg.name = 'location_x_10_avg' location_x_20_avg = T.mean(location[-1, :, 0]) location_x_20_avg.name = 'location_x_20_avg' scale_x_0_avg = T.mean(scale[0, :, 0]) scale_x_0_avg.name = 'scale_x_0_avg' scale_x_10_avg = T.mean(scale[10, :, 0]) scale_x_10_avg.name = 'scale_x_10_avg' scale_x_20_avg = T.mean(scale[-1, :, 0]) scale_x_20_avg.name = 'scale_x_20_avg' monitorings = [error_rate, location_x_0_avg, location_x_10_avg, location_x_20_avg, scale_x_0_avg, scale_x_10_avg, scale_x_20_avg] model.monitorings = monitorings return model
def setup_model(): # shape: T x B x F input_ = T.tensor3('features') # shape: B target = T.lvector('targets') model = LSTMAttention(dim=500, mlp_hidden_dims=[400, 4], batch_size=100, image_shape=(100, 100), patch_shape=(28, 28), weights_init=Glorot(), biases_init=Constant(0)) model.initialize() h, c, location, scale = model.apply(input_) classifier = MLP([Rectifier(), Softmax()], [500, 100, 10], weights_init=Glorot(), biases_init=Constant(0)) model.h = h classifier.initialize() probabilities = classifier.apply(h[-1]) cost = CategoricalCrossEntropy().apply(target, probabilities) error_rate = MisclassificationRate().apply(target, probabilities) location_x_avg = T.mean(location[:, 0]) location_x_avg.name = 'location_x_avg' location_y_avg = T.mean(location[:, 1]) location_y_avg.name = 'location_y_avg' scale_x_avg = T.mean(scale[:, 0]) scale_x_avg.name = 'scale_x_avg' scale_y_avg = T.mean(scale[:, 1]) scale_y_avg.name = 'scale_y_avg' location_x_std = T.std(location[:, 0]) location_x_std.name = 'location_x_std' location_y_std = T.std(location[:, 1]) location_y_std.name = 'location_y_std' scale_x_std = T.std(scale[:, 0]) scale_x_std.name = 'scale_x_std' scale_y_std = T.std(scale[:, 1]) scale_y_std.name = 'scale_y_std' monitorings = [error_rate, location_x_avg, location_y_avg, scale_x_avg, scale_y_avg, location_x_std, location_y_std, scale_x_std, scale_y_std] return cost, monitorings
def test_load(): # Create a main loop and checkpoint it mlp = MLP(activations=[None], dims=[10, 10], weights_init=Constant(1.), use_bias=False) mlp.initialize() W = mlp.linear_transformations[0].W x = tensor.vector('data') cost = mlp.apply(x).mean() data = numpy.random.rand(10, 10).astype(theano.config.floatX) data_stream = IterableDataset(data).get_example_stream() main_loop = MainLoop(data_stream=data_stream, algorithm=GradientDescent(cost=cost, parameters=[W]), extensions=[ FinishAfter(after_n_batches=5), Checkpoint('myweirdmodel.picklebarrel') ]) main_loop.run() # Load the parameters, log and iteration state old_value = W.get_value() W.set_value(old_value * 2) main_loop = MainLoop(model=Model(cost), data_stream=data_stream, algorithm=GradientDescent(cost=cost, parameters=[W]), extensions=[ Load('myweirdmodel.picklebarrel', load_iteration_state=True, load_log=True) ]) main_loop.extensions[0].main_loop = main_loop main_loop._run_extensions('before_training') assert_allclose(W.get_value(), old_value) # Make sure things work too if the model was never saved before main_loop = MainLoop(model=Model(cost), data_stream=data_stream, algorithm=GradientDescent(cost=cost, parameters=[W]), extensions=[ Load('mynonexisting.picklebarrel', load_iteration_state=True, load_log=True) ]) main_loop.extensions[0].main_loop = main_loop main_loop._run_extensions('before_training')
def test_checkpointing(): # Create a main loop and checkpoint it mlp = MLP(activations=[None], dims=[10, 10], weights_init=Constant(1.), use_bias=False) mlp.initialize() W = mlp.linear_transformations[0].W x = tensor.vector('data') cost = mlp.apply(x).mean() data = numpy.random.rand(10, 10).astype(theano.config.floatX) data_stream = IterableDataset(data).get_example_stream() main_loop = MainLoop( data_stream=data_stream, algorithm=GradientDescent(cost=cost, parameters=[W]), extensions=[FinishAfter(after_n_batches=5), Checkpoint('myweirdmodel.tar', parameters=[W])] ) main_loop.run() # Load it again old_value = W.get_value() W.set_value(old_value * 2) main_loop = MainLoop( model=Model(cost), data_stream=data_stream, algorithm=GradientDescent(cost=cost, parameters=[W]), extensions=[Load('myweirdmodel.tar')] ) main_loop.extensions[0].main_loop = main_loop main_loop._run_extensions('before_training') assert_allclose(W.get_value(), old_value) # Make sure things work too if the model was never saved before main_loop = MainLoop( model=Model(cost), data_stream=data_stream, algorithm=GradientDescent(cost=cost, parameters=[W]), extensions=[Load('mynonexisting.tar')] ) main_loop.extensions[0].main_loop = main_loop main_loop._run_extensions('before_training') # Cleaning if os.path.exists('myweirdmodel.tar'): os.remove('myweirdmodel.tar')
def test_mlp(): x = tensor.matrix() x_val = numpy.random.rand(2, 16).astype(theano.config.floatX) mlp = MLP(activations=[Tanh(), None], dims=[16, 8, 4], weights_init=Constant(1), biases_init=Constant(1)) y = mlp.apply(x) mlp.initialize() assert_allclose( numpy.tanh(x_val.dot(numpy.ones((16, 8))) + numpy.ones((2, 8))).dot( numpy.ones((8, 4))) + numpy.ones((2, 4)), y.eval({x: x_val}), rtol=1e-06) mlp = MLP(activations=[None], weights_init=Constant(1), use_bias=False) mlp.dims = [16, 8] y = mlp.apply(x) mlp.initialize() assert_allclose(x_val.dot(numpy.ones((16, 8))), y.eval({x: x_val}), rtol=1e-06)
def create_model(self, x, y, input_dim, p): # Create the output of the MLP mlp = MLP( [Rectifier(), Rectifier(), Logistic()], [input_dim, 150, 100, 1], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() probs = 1 - mlp.apply(x) y = y.dimshuffle(0, 'x') # Create the if-else cost function pos_ex = (y * probs) / p neg_ex = (1 - y) * (1 - probs) / np.float32(1 - p) reward = pos_ex + neg_ex cost = reward # Negative of reward cost.name = "cost" return mlp, cost
def setUp(self): """Create main loop and run it.""" mlp = MLP(activations=[None], dims=[10, 10], weights_init=Constant(1.0), use_bias=False) mlp.initialize() self.W = mlp.linear_transformations[0].W x = tensor.vector("data") cost = mlp.apply(x).mean() data = numpy.random.rand(10, 10).astype(theano.config.floatX) self.data_stream = IterableDataset(data).get_example_stream() self.model = Model(cost) self.algorithm = GradientDescent(cost=cost, parameters=[self.W]) self.main_loop = MainLoop( model=self.model, data_stream=self.data_stream, algorithm=self.algorithm, extensions=[FinishAfter(after_n_batches=5), Checkpoint("myweirdmodel.tar", save_separately=["log"])], ) self.main_loop.run()
def test_mlp_apply(): x = tensor.matrix() x_val = numpy.random.rand(2, 16).astype(theano.config.floatX) mlp = MLP(activations=[Tanh().apply, None], dims=[16, 8, 4], weights_init=Constant(1), biases_init=Constant(1)) y = mlp.apply(x) mlp.initialize() assert_allclose( numpy.tanh(x_val.dot(numpy.ones((16, 8))) + numpy.ones((2, 8))).dot( numpy.ones((8, 4))) + numpy.ones((2, 4)), y.eval({x: x_val}), rtol=1e-06) mlp = MLP(activations=[None], weights_init=Constant(1), use_bias=False) mlp.dims = [16, 8] y = mlp.apply(x) mlp.initialize() assert_allclose(x_val.dot(numpy.ones((16, 8))), y.eval({x: x_val}), rtol=1e-06) assert mlp.rng == mlp.linear_transformations[0].rng
def main(save_to, num_epochs, batch_size): mlp = MLP([Tanh(), Tanh(), Tanh(), Softmax()], [3072, 4096, 1024, 512, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() x = tt.tensor4('features', dtype='float32') y = tt.vector('label', dtype='int32') probs = mlp.apply(x.reshape((-1,3072))) cost = CategoricalCrossEntropy().apply(y, probs) error_rate = MisclassificationRate().apply(y, probs) cg = ComputationGraph([cost]) ws = VariableFilter(roles=[WEIGHT])(cg.variables) cost = cost + .00005 * sum(([(w**2).sum() for w in ws])) cost.name = 'final_cost' train_dataset = Cifar10Dataset(data_dir='/home/belohlavek/data/cifar10', is_train=True) valid_dataset = Cifar10Dataset(data_dir='/home/belohlavek/data/cifar10', is_train=False) train_stream = train_dataset.get_stream(batch_size) valid_stream = valid_dataset.get_stream(batch_size) algorithm = GradientDescent( cost=cost, parameters=cg.parameters, step_rule=Adam(learning_rate=0.001)) extensions = [Timing(), LogExtension('/home/belohlavek/ALI/mlp.log'), FinishAfter(after_n_epochs=num_epochs), DataStreamMonitoring([cost, error_rate], valid_stream, prefix="test"), TrainingDataMonitoring( [cost, error_rate, aggregation.mean(algorithm.total_gradient_norm)], prefix="train", after_epoch=True), Checkpoint(save_to), Printing()] main_loop = MainLoop(algorithm, train_stream, model=Model(cost), extensions=extensions) main_loop.run()
def test_apply_batch_normalization_nested(): x = tensor.matrix() eps = 1e-8 batch_dims = (3, 9) bn = BatchNormalization(input_dim=5, epsilon=eps) mlp = MLP([Sequence([bn.apply, Tanh().apply])], [9, 5], weights_init=Constant(0.4), biases_init=Constant(1)) mlp.initialize() y = mlp.apply(x) cg = apply_batch_normalization(ComputationGraph([y])) y_bn = cg.outputs[0] rng = numpy.random.RandomState((2016, 1, 18)) x_ = rng.uniform(size=batch_dims).astype(theano.config.floatX) y_ = y_bn.eval({x: x_}) W_, b_ = map(lambda s: (getattr(mlp.linear_transformations[0], s) .get_value(borrow=True)), ['W', 'b']) z_ = numpy.dot(x_, W_) + b_ y_expected = numpy.tanh((z_ - z_.mean(axis=0)) / numpy.sqrt(z_.var(axis=0) + eps)) assert_allclose(y_, y_expected, rtol=1e-3)
def create_model(self): x = self.x y = self.y input_dim = self.input_dim p = self.p mlp = MLP( [Rectifier(), Rectifier(), Logistic()], [input_dim, 100, 80, 1], weights_init=IsotropicGaussian(0.001), biases_init=Constant(0)) mlp.initialize() self.mlp = mlp probs = mlp.apply(x) probs.name = "score" y = y.dimshuffle(0, 'x') # Create the if-else cost function pos_ex = (y * probs) / p neg_ex = (1 - y) * (1 - probs) / np.float32(1 - p) reward = pos_ex + neg_ex cost = reward # Negative of reward cost.name = "cost" return cost, probs
def build_mlp(features_cat, features_int, labels): mlp_int = MLP(activations=[Rectifier(), Rectifier()], dims=[19, 50, 50], weights_init=IsotropicGaussian(), biases_init=Constant(0), name='mlp_interval') mlp_int.initialize() mlp_cat = MLP(activations=[Logistic()], dims=[320, 50], weights_init=IsotropicGaussian(), biases_init=Constant(0), name='mlp_categorical') mlp_cat.initialize() mlp = MLP(activations=[Rectifier(), None], dims=[50, 50, 1], weights_init=IsotropicGaussian(), biases_init=Constant(0)) mlp.initialize() gated = mlp_cat.apply(features_cat) * mlp_int.apply(features_int) prediction = mlp.apply(gated) cost = MAPECost().apply(prediction, labels) cg = ComputationGraph(cost) print cg.variables cg_dropout1 = apply_dropout(cg, [VariableFilter(roles=[OUTPUT])(cg.variables)[1], VariableFilter(roles=[OUTPUT])(cg.variables)[3]], .2) cost_dropout1 = cg_dropout1.outputs[0] return cost_dropout1, cg_dropout1.parameters, cost
def construct_model(vocab_size, embedding_dim, ngram_order, hidden_dims, activations): # Construct the model x = tensor.lmatrix('features') y = tensor.lvector('targets') lookup = LookupTable(length=vocab_size, dim=embedding_dim, name='lookup') hidden = MLP(activations=activations + [None], dims=[ngram_order * embedding_dim] + hidden_dims + [vocab_size]) embeddings = lookup.apply(x) embeddings = embeddings.flatten(ndim=2) # Concatenate embeddings activations = hidden.apply(embeddings) cost = Softmax().categorical_cross_entropy(y, activations) # Initialize parameters lookup.weights_init = IsotropicGaussian(0.001) hidden.weights_init = IsotropicGaussian(0.01) hidden.biases_init = Constant(0.001) lookup.initialize() hidden.initialize() return cost
def main(save_to, num_epochs): batch_size = 128 dim = 100 n_steps = 20 i2h1 = MLP([Identity()], [784, dim], biases_init=Constant(0.), weights_init=IsotropicGaussian(.001)) h2o1 = MLP([Rectifier(), Logistic()], [dim, dim, 784], biases_init=Constant(0.), weights_init=IsotropicGaussian(.001)) rec1 = SimpleRecurrent(dim=dim, activation=Tanh(), weights_init=Orthogonal()) i2h1.initialize() h2o1.initialize() rec1.initialize() x = tensor.tensor3('features') x1 = x[1:, :, :] x2 = x[:-1, :, :] preproc = i2h1.apply(x1) h1 = rec1.apply(preproc) x_hat = h2o1.apply(h1) cost = tensor.nnet.binary_crossentropy(x_hat, x2).mean() # cost = CategoricalCrossEntropy().apply(y.flatten(), probs) cost.name = 'final_cost' cg = ComputationGraph([cost, ]) mnist_train = MNIST("train", subset=slice(0, 50000), sources=('features', )) mnist_valid = MNIST("train", subset=slice(50000, 60000), sources=('features',)) mnist_test = MNIST("test") trainstream = Mapping(Flatten(DataStream(mnist_train, iteration_scheme=SequentialScheme(50000, batch_size))), _meanize(n_steps)) validstream = Mapping(Flatten(DataStream(mnist_valid, iteration_scheme=SequentialScheme(10000, batch_size))), _meanize(n_steps)) teststream = Mapping(Flatten(DataStream(mnist_test, iteration_scheme=SequentialScheme(10000, batch_size))), _meanize(n_steps)) algorithm = GradientDescent( cost=cost, params=cg.parameters, step_rule=CompositeRule([Adam(), StepClipping(100)])) main_loop = MainLoop( algorithm, trainstream, extensions=[Timing(), FinishAfter(after_n_epochs=num_epochs), # DataStreamMonitoring( # [cost, ], # teststream, # prefix="test"), DataStreamMonitoringAndSaving( [cost, ], validstream, [i2h1, h2o1, rec1], 'best_'+save_to+'.pkl', cost_name=cost.name, after_epoch=True, prefix='valid'), TrainingDataMonitoring( [cost, aggregation.mean(algorithm.total_gradient_norm)], prefix="train", after_epoch=True), # Plot( # save_to, # channels=[ # ['test_final_cost', # 'test_misclassificationrate_apply_error_rate'], # ['train_total_gradient_norm']]), Printing()]) main_loop.run()
#step_rule = RMSProp(learning_rate=lr, decay_rate=0.95) #step_rule = Momentum(learning_rate=lr, momentum=0.9) batch_size = 1000 nvis, nhid, nlat = 784, 200, 144 theano_rng = MRG_RandomStreams(134663) # Initialize prior prior_mu = shared_floatx(numpy.zeros(nlat), name='prior_mu') prior_log_sigma = shared_floatx(numpy.zeros(nlat), name='prior_log_sigma') # Initialize encoding network encoder = MLP(activations=[Rectifier(), Identity()], dims=[nvis, nhid, nlat], weights_init=IsotropicGaussian(std=0.001), biases_init=Constant(0)) encoder.initialize() # Initialize decoding network decoder = MLP(activations=[Rectifier(), Identity()], dims=[nlat, nhid, nvis], weights_init=IsotropicGaussian(std=0.001), biases_init=Constant(0)) decoder.initialize() # Encode / decode x = tensor.matrix('features') x.tag.test_value = np.random.randn(batch_size, 784).astype("float32") # doesn't work (need to change theano flags?) #ztest = T.dot(theano.shared(np.random.randn(batch_size, batch_size)), x) #print ztest.tag.test_value.shape z = encoder.apply(x) z.name = 'z'
KL_term = -0.5 * ((1 + z1_lognu -T.exp(z1_lognu) - z1_mu ** 2).sum(axis=1) + (1 + z2_lognu -T.exp(z2_lognu) - z2_mu ** 2).sum(axis=1)) reconstruction_term = (x * T.log(decoder_p) + (1 - x) * T.log(1 - decoder_p)).sum(axis=1) cost = (KL_term -reconstruction_term).mean() cost.name = 'negative_log_likelihood' # Initialize the parameters encoder_network1._push_initialization_config() for layer in encoder_network1.linear_transformations: layer.weights_init = Uniform( width=12. / (layer.input_dim + layer.output_dim)) encoder_network1.initialize() encoder_network2._push_initialization_config() for layer in encoder_network2.linear_transformations: layer.weights_init = Uniform( width=12. / (layer.input_dim + layer.output_dim)) encoder_network2.initialize() encoder_network3._push_initialization_config() for layer in encoder_network3.linear_transformations: layer.weights_init = Uniform( width=12. / (layer.input_dim + layer.output_dim)) encoder_network3.initialize() encoder_network4._push_initialization_config() for layer in encoder_network4.linear_transformations:
x1 = data_preprocessing1(x).copy(name='x_clean')) x2 = data_preprocessing2(x).copy(name='x_dirty')) out1 = conv_sequence2.apply(x1) out2 = conv_sequence2.apply(x2) ### Flattening data conv_out1 = Flattener(name='flattener1').apply(out1) conv_out2 = Flattener(name='flattener2').apply(out2) conv_out = tensor.concatenate([conv_out1,conv_out2],axis=1) ### MLP mlp_hiddens = 1000 top_mlp_dims = [numpy.prod(conv_sequence1.get_dim('output'))+numpy.prod(conv_sequence1.get_dim('output'))] + [mlp_hiddens] + [output_size] top_mlp = MLP(mlp_activation, top_mlp_dims,weights_init=Uniform(width=0.2),biases_init=Constant(0.)) top_mlp.initialize() ### Getting the data from fuel.datasets.dogs_vs_cats import DogsVsCats from fuel.streams import DataStream, ServerDataStream from fuel.schemes import ShuffledScheme from fuel.transformers.image import RandomFixedSizeCrop, MinimumImageDimensions, Random2DRotation from fuel.transformers import Flatten, Cast, ScaleAndShift def create_data(data): stream = DataStream(data, iteration_scheme=ShuffledScheme(data.num_examples, batch_size))
biases_init=Constant(0.)) convnet.initialize() conv_features = Flattener().apply(convnet.apply(X)) # MLP mlp = MLP(activations=[Logistic(name='sigmoid_0'), Softmax(name='softmax_1')], dims=[ 256, 256, 256, 2], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) [child.name for child in mlp.children] ['linear_0', 'sigmoid_0', 'linear_1', 'softmax_1'] Y = mlp.apply(conv_features) mlp.initialize() # Setting up the cost function from blocks.bricks.cost import CategoricalCrossEntropy cost = CategoricalCrossEntropy().apply(T.flatten(), Y) from blocks.roles import WEIGHT from blocks.graph import ComputationGraph from blocks.filter import VariableFilter cg = ComputationGraph(cost) print(VariableFilter(roles=[WEIGHT])(cg.variables)) W1, W2, W3 = VariableFilter(roles=[WEIGHT])(cg.variables) # cost with L2 regularization cost = cost + 0.005 * (W2 ** 2).sum() + 0.005 * (W3 ** 2).sum() cost.name = 'cost_with_regularization'