def fit(self, p_X_training, p_Y_training, p_X_validation, p_Y_validation, p_batchs_per_epoch=50, p_number_hidden_layers=1, p_number_neurons_hidden_layers=np.array([1])): print("Porcentaje de accidentes: ", round(np.mean(p_Y_training[:, 0]) * 100, 2), "%\n") self.input_layer_ = inputlayer.InputLayer(p_X_training.shape[1]) self.hidden_layers_ = [] for v_layer in range(p_number_hidden_layers): if v_layer == 0: self.hidden_layers_.append( hiddenlayer.HiddenLayer( p_number_neurons_hidden_layers[v_layer], self.input_layer_.number_neurons)) else: self.hidden_layers_.append( hiddenlayer.HiddenLayer( p_number_neurons_hidden_layers[v_layer], p_number_neurons_hidden_layers[v_layer - 1])) self.output_layer_ = outputlayer.OutputLayer( p_Y_training.shape[1], self.hidden_layers_[self.hidden_layers_.__len__() - 1].number_neurons) self.input_layer_.init_w(self.random_seed) for v_hidden_layer in self.hidden_layers_: v_hidden_layer.init_w(self.random_seed) self.output_layer_.init_w(self.random_seed) randomize = np.arange(len(p_X_training)) p_X_shuffle = p_X_training.copy() p_Y_shuffle = p_Y_training.copy() for epoch in range(0, self.number_iterations): np.random.shuffle(randomize) p_X_training = p_X_shuffle[randomize] # pX[2], pX[1], pX[0] p_Y_training = p_Y_shuffle[randomize] # pY[2], py[1], py[0] if p_batchs_per_epoch > len(p_X_training): p_batchs_per_epoch = len(p_X_training) for batch in range(0, p_batchs_per_epoch): current_batch_X = p_X_training[batch:batch + 1, :] current_batch_Y = p_Y_training[batch:batch + 1, :] self.forward_pass(current_batch_X) self.backward_pass(current_batch_Y) self.show_progress(epoch, p_X_validation, p_Y_validation) return
def compile_model(self, weightMatrix=None): x = T.vector('x') y = T.iscalar('y') params = self.hidden_layers_params[:] # Creating the first hidden layer with x symbolic vector n_in, n_out = params.pop(0) self.hidden_layers.append(HL.HiddenLayer(x, n_in, n_out)) if weightMatrix: self.hidden_layers[0].setW(weightMatrix[0][0], weightMatrix[0][1]) weightMatrix.pop(0) # Creating the rest hidden layers, each layers input is the previous layer's output for i in xrange(len(params)): n_in, n_out = params[i] self.hidden_layers.append(HL.HiddenLayer(self.hidden_layers[-1].output, n_in, n_out)) if weightMatrix: self.hidden_layers[-1].setW(weightMatrix[i][0], weightMatrix[i][1]) # Creating the logistical regression layer self.logreg_layer = LL.LogRegLayer(self.hidden_layers[-1].output, self.hidden_layers[-1].n_out, len(self.classes)) if weightMatrix: self.logreg_layer.setW(weightMatrix[-1][0], weightMatrix[-1][1]) # Calculating the cost of the network, the cost is the negative log likelihood of label + L1 and L2 regressions self.cost = -T.log(self.logreg_layer.output)[0,y] for hidden in self.hidden_layers: self.cost += self.L1(self.logreg_layer.W, hidden.W) self.cost += self.L2(self.logreg_layer.W, hidden.W) # Creating the udate vector, each layer's weight vector is changed based on the cost updates = [(self.logreg_layer.W, self.sgd_step(self.logreg_layer.W)), (self.logreg_layer.b, self.sgd_step(self.logreg_layer.b))] updates.extend([(hidden.W, self.sgd_step(hidden.W)) for hidden in self.hidden_layers]) updates.extend([(hidden.b, self.sgd_step(hidden.b)) for hidden in self.hidden_layers]) # Creating the training model which is a theano function, inputs are a feature vector and a label self.train_model = theano.function( inputs = [x, y], outputs = self.cost, updates = updates, ) # Creating the evaluating model which is a theano function, inputs are a feature vector and a label self.devtest_model = theano.function( inputs = [x, y], outputs = T.neq(y, T.argmax(self.logreg_layer.output[0])) ) self.evaluate_model = theano.function( inputs = [x], outputs = T.argmax(self.logreg_layer.output[0]) )
def __init__(self, rng, input, n_in, n_hidden, n_out): """Initialize the parameters for the multilayer perceptron :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.TensorType :param input: symbolic variable that describes the input of the architecture (one minibatch) :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_hidden: int :param n_hidden: number of hidden units :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie """ self.n_hidden = n_hidden # Since we are dealing with a one hidden layer MLP, this will translate # into a HiddenLayer with a tanh activation function connected to the # LogisticRegression layer; the activation function can be replaced by # sigmoid or any other nonlinear function self.hiddenLayer = hiddenlayer.HiddenLayer(rng=rng, input=input, n_in=n_in, n_out=n_hidden, activation=T.tanh) # The logistic regression layer gets as input the hidden units # of the hidden layer self.logRegressionLayer = LogisticRegression( input=self.hiddenLayer.output, n_in=n_hidden, n_out=n_out) # end-snippet-2 start-snippet-3 # L1 norm ; one regularization option is to enforce L1 norm to # be small self.L1 = (abs(self.hiddenLayer.W).sum() + abs(self.logRegressionLayer.W).sum()) # square of L2 norm ; one regularization option is to enforce # square of L2 norm to be small self.L2_sqr = ((self.hiddenLayer.W**2).sum() + (self.logRegressionLayer.W**2).sum()) # negative log likelihood of the MLP is given by the negative # log likelihood of the output of the model, computed in the # logistic regression layer self.negative_log_likelihood = ( self.logRegressionLayer.negative_log_likelihood) # same holds for the function computing the number of errors self.errors = self.logRegressionLayer.errors # the parameters of the model are the parameters of the two layer it is # made out of self.params = self.hiddenLayer.params + self.logRegressionLayer.params # end-snippet-3 # keep track of model input self.input = input
def __init__(self, rng, input, n_in, n_hidden, n_out, weight_str=[]): """Initialize the parameters for the multilayer perceptron :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.TensorType :param input: symbolic variable that describes the input of the architecture (one minibatch) :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_hidden: int :param n_hidden: number of hidden units :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie """ self.n_in = n_in self.n_out = n_out self.n_hidden = n_hidden # Since we are dealing with a one hidden layer MLP, this will translate # into a HiddenLayer with a tanh activation function connected to the # LogisticRegression layer; the activation function can be replaced by # sigmoid or any other nonlinear function if len(weight_str): fir_weight = weight_str[1:(self.n_in + 1) * self.n_hidden + 1].reshape(self.n_in + 1, self.n_hidden) fir_weightb = fir_weight[-1, :].reshape((self.n_hidden, )) fir_weightrest = fir_weight[:-1, :] sec_weight = weight_str[(self.n_in + 1) * self.n_hidden + 1:].reshape((self.n_hidden + 1), self.n_out) sec_weightb = sec_weight[-1, :].reshape((self.n_out, )) sec_weightrest = sec_weight[:-1, :] fir_W_values = numpy.asarray(fir_weightrest, dtype=theano.config.floatX) self.fir_weight = theano.shared(value=fir_W_values, name='fir_W', borrow=True) fir_b_values = numpy.asarray(fir_weightb, dtype=theano.config.floatX) self.fir_b = theano.shared(value=fir_b_values, name='fir_b', borrow=True) sec_W_values = numpy.asarray(sec_weightrest, dtype=theano.config.floatX) self.sec_weight = theano.shared(value=sec_W_values, name='sec_W', borrow=True) sec_b_values = numpy.asarray(sec_weightb, dtype=theano.config.floatX) self.sec_b = theano.shared(value=sec_b_values, name='sec_b', borrow=True) print(self.fir_weight, self.fir_b) self.hiddenLayer = hiddenlayer.HiddenLayer(rng=rng, input=input, n_in=n_in, n_out=n_hidden, activation=T.tanh, W=self.fir_weight, b=self.fir_b) # The logistic regression layer gets as input the hidden units # of the hidden layer self.logRegressionLayer = LogisticRegression( input=self.hiddenLayer.output, n_in=n_hidden, n_out=n_out, W=self.sec_weight, b=self.sec_b) # end-snippet-2 start-snippet-3 # L1 norm ; one regularization option is to enforce L1 norm to # be small self.L1 = (abs(self.hiddenLayer.W).sum() + abs(self.logRegressionLayer.W).sum()) # square of L2 norm ; one regularization option is to enforce # square of L2 norm to be small self.L2_sqr = ((self.hiddenLayer.W**2).sum() + (self.logRegressionLayer.W**2).sum()) # negative log likelihood of the MLP is given by the negative # log likelihood of the output of the model, computed in the # logistic regression layer self.negative_log_likelihood = ( self.logRegressionLayer.negative_log_likelihood) self.mean_square_error = (self.logRegressionLayer.mean_square_error) # same holds for the function computing the number of errors self.errors = self.logRegressionLayer.errors # the parameters of the model are the parameters of the two layer it is # made out of self.params = self.hiddenLayer.params + self.logRegressionLayer.params # end-snippet-3 # keep track of model input self.input = input