Ejemplo n.º 1
0
    def add_last_layer(self, ini=Xavier(), acti=softmax()):
        n_in = self.dims[-1]
        n_out = self.classes
        layer = hidden_layer(n_in, n_out, ini, last_layer=True)
        # The activation function is softmax for the last layer
        layer.setActivation(softmax())

        # last layer dose not need Dropout
        layer.setDropout(drop=0)
        if (self.optimizer != None):
            layer.setOptimizer(self.optimizer.clone())

        self.layers.append(layer)
        print('LAST LAYER with initialization: {}, '.format(ini.name),
              'activation: {}'.format(acti.name))
Ejemplo n.º 2
0
    def estimate_total(self, trainX, trainY, val_X, val_Y, l2, lambd):
        numData = trainY.shape[1]
        AL, _ = self.forward_propagation(trainX)
        # For softmax
        SAL = softmax(AL)

        cost = self.compute_cost(SAL, trainY, 'cross_entropy', l2, lambd)
        prediction = np.argmax(SAL, axis=0)
        solution = np.argmax(trainY, axis=0)

        right = np.sum(prediction == solution)
        train_accuracy = right / numData

        val_accuracy = None
        if val_X is not None and val_Y is not None:
            val_AL, _ = self.forward_propagation(val_X)
            # cost = self.compute_cost(val_AL, val_Y, 'cross_entropy')

            val_pred = np.argmax(val_AL, axis=0)
            val_sol = np.argmax(val_Y, axis=0)

            val_right = np.sum(val_pred == val_sol)
            val_accuracy = val_right / val_Y.shape[1]

        return train_accuracy, val_accuracy, cost
Ejemplo n.º 3
0
 def test_softmax(self):
     self.assertEqual(list(a.softmax(list(range(1, 6)))),
     [
         0.011656230956039607,
         0.03168492079612427,
         0.0861285444362687,
         0.23412165725273662,
         0.6364086465588308
     ])
Ejemplo n.º 4
0
    def test_softmax():
        """Test softmax activation function"""

        x = np.array([[0, 1, 3], [-1, 0, -5], [1, 0, 3], [10, -9, -7]])

        y = np.array([[0.04201, 0.11420, 0.84379], [0.26762, 0.72747, 0.00490],
                      [0.11420, 0.04201, 0.84379], [1, 0, 0]])

        assert np.allclose(softmax(x), y, atol=0.00001)
Ejemplo n.º 5
0
    def predict(self, x):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']

        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)

        return y
Ejemplo n.º 6
0
    def feedforward(self, inputs):
        '''
        Passes inputs forward through neural network, returns an array of
        probabilities

        Input Layer => Hidden Layer => Softmax Layer
        '''
        # feedforward inputs through hidden layer
        hidden_outputs = [node.feedforward(inputs) for node in self.hidden]

        # feedforward hidden layer outputs through softmax layer
        return softmax([node.linearsum(hidden_outputs) for node in self.soft])
def train(train_x, train_y, learning_rate=0.2):
    # Flatten input (batch_size, 28, 28) -> (batch_size, 784)
    x = train_x.reshape(train_x.shape[0], -1)

    # Turn labels into their one-hot representations
    y = one_hot_encoder(train_y)

    # Initialize weights
    w1, b1 = initialize_weight((784, 256), bias=True)
    w2, b2 = initialize_weight((256, 10), bias=True)

    num_epochs = 50
    loss_history = []
    for epoch in range(1, num_epochs + 1):
        print("Epoch {}/{}\n===============".format(epoch, num_epochs))

        # Forward Prop
        h1 = np.dot(x, w1) + b1
        a1 = sigmoid(h1)
        h2 = np.dot(a1, w2) + b2
        a2 = softmax(h2)
        out = a2

        # Cross Entropy Loss
        loss = cross_entropy_loss(out, train_y)
        loss_history.append(loss)
        print("Loss: {:.6f}".format(loss))

        # Compute and print accuracy
        pred = np.argmax(out, axis=1)
        pred = pred.reshape(pred.shape[0], 1)
        acc = np.mean(pred == train_y)
        print("Accuracy: {:.2f}%\n".format(acc * 100))

        # Backward Prop
        m = out.shape[0]
        dh2 = a2 - y
        dw2 = (1 / m) * np.dot(a1.T, dh2)
        db2 = (1 / m) * np.sum(dh2, axis=0, keepdims=True)

        dh1 = np.dot(dh2, w2.T) * sigmoid_prime(a1)
        dw1 = (1 / m) * np.dot(x.T, dh1)
        db1 = (1 / m) * np.sum(dh1, axis=0, keepdims=True)

        # Weight (and bias) update
        w1 -= learning_rate * dw1
        b1 -= learning_rate * db1
        w2 -= learning_rate * dw2
        b2 -= learning_rate * db2

    return w1, b1, w2, b2, loss_history
Ejemplo n.º 8
0
    def hessian(self, x, t):
        k = t.shape[1]
        n = t.shape[0]
        d = x.shape[1]

        w = np.reshape(self.w, (x.shape[1], -1), 'F')
        y = softmax(np.dot(x, w))
        
        h = np.zeros([d*k, d*k])
        for i in xrange(k):
            for j in xrange(k):
                h[i*d:(i+1)*d,j*d:(j+1)*d] = np.dot(np.transpose(x) * (y[:,i] * ((i==j) - y[:,j])), x)

        return h
    def __init__(self, input, n_in, n_out, W=None, b=None):
        if W is None:
            W = theano.shared(np.random.randn(n_out, n_in).astype(dtype=theano.config.floatX)/np.sqrt(n_in))
        if b is None:
            b = theano.shared(np.random.randn(n_out).astype(dtype=theano.config.floatX))
        self.W = W
        self.b = b
        self.v_W = theano.shared(np.zeros((n_out, n_in)).astype(dtype=theano.config.floatX))
        self.v_b = theano.shared(np.zeros(n_out).astype(dtype=theano.config.floatX))

        self.y = a.softmax( T.dot(W, input) + b.dimshuffle(0, 'x'))
        self.y_pred = T.argmax(self.y, axis=0)
        self.params = [self.W, self.b]
        self.velo = [self.v_W, self.v_b]
        self.input = input
Ejemplo n.º 10
0
 def feedForward(self, inputs):
     if len(inputs) != self.input-1:
         raise ValueError('Wrong number of inputs')
     
     # input activations
     self.ai = np.append(inputs, [1]) # add bias node
     
     # hidden activations
     self.ah = sigmoid(self.ai.dot(self.wi))
     # self.ah = relu(self.ai.dot(self.wi))
     
     # output activations
     self.ao = sigmoid(self.ah.dot(self.wo))
     # self.ao = relu(self.ah.dot(self.wo))
     
     return softmax(self.ao)
    def __init__(self, input_list, n_in, n_out, n_total, mask, batch, W=None, b=None, M=None):
        w = np.zeros((n_in, n_out))
        np.fill_diagonal(w, 1)
        if W is None:
            #W = theano.shared(np.random.randn(n_in, n_out).astype(dtype=theano.config.floatX)/np.sqrt(n_in))
            W = theano.shared(w.astype(dtype=theano.config.floatX)/np.sqrt(n_in))
        if b is None:
            b = theano.shared(np.zeros(n_out).astype(dtype=theano.config.floatX))
        if M is None:
            M = theano.shared(0.5 * np.ones((n_total, 2)).astype(dtype=theano.config.floatX))
        self.W = W
        self.b = b
        self.M = M
        self.v_W = theano.shared(np.zeros((n_in, n_out)).astype(dtype=theano.config.floatX))
        self.v_b = theano.shared(np.zeros(n_out).astype(dtype=theano.config.floatX))
        self.v_M = theano.shared(np.zeros((n_total, 2)).astype(dtype=theano.config.floatX))
        self.input_list = input_list
        self.input_list[0] = self.input_list[0]
        self.input_list[1] = (self.input_list[1])[::-1]
        '''
        def Merge(input_seq1, input_seq2, merger):
            return T.dot((input_seq1 * merger[0] + input_seq2 * merger[1]), self.W) + self.b
        self.temp_y = a.softmax((theano.scan(Merge,
            sequences=[self.input_list[0], self.input_list[1], self.M],
                outputs_info=None))[0])
        '''

        def Merge(input_seq1, input_seq2):
            return T.dot((input_seq1 * 1 + input_seq2 * 0), self.W) + self.b

        self.temp_y = a.softmax((theano.scan(Merge,
            sequences=[self.input_list[0], self.input_list[1]],
                outputs_info=None))[0])


        self.temp_y = self.temp_y.dimshuffle(1,0,2)
        self.mask = mask
        self.batch = batch
        y_pred_list = []
        for i in range(self.batch):
            y_pred_list.append(T.set_subtensor(T.argmax(self.temp_y[i], axis=1)[self.mask[i]:], 0))
        self.y_pred = T.stacklists(y_pred_list)

        self.params = [self.W, self.b, self.M]
        self.velo = [self.v_W, self.v_b, self.v_M]
def visualization(test_x, test_y, w1, b1, w2, b2):
    x = test_x[:20]
    x = x.reshape(x.shape[0], -1)
    y = test_y[:20]

    # Forward Pass
    h1 = np.dot(x, w1) + b1
    a1 = sigmoid(h1)
    h2 = np.dot(a1, w2) + b2
    a2 = softmax(h2)
    out = a2
    pred = np.argmax(out, axis=1)

    fig = plt.figure(figsize=(25, 4))
    for index in np.arange(20):
        ax = fig.add_subplot(2, 20 / 2, index + 1, xticks=[], yticks=[])
        ax.imshow(test_x[index], cmap='gray')
        ax.set_title("{} ({})".format(str(pred[index]), str(y[index][0])),
                     color=("green" if pred[index] == y[index] else "red"))
    def __init__(self, input, n_in, n_out, W=None, b=None):
        if W is None:
            W = theano.shared(
                np.random.randn(n_out, n_in).astype(dtype=theano.config.floatX)
                / np.sqrt(n_in))
        if b is None:
            b = theano.shared(
                np.random.randn(n_out).astype(dtype=theano.config.floatX))
        self.W = W
        self.b = b
        self.v_W = theano.shared(
            np.zeros((n_out, n_in)).astype(dtype=theano.config.floatX))
        self.v_b = theano.shared(
            np.zeros(n_out).astype(dtype=theano.config.floatX))

        self.y = a.softmax(T.dot(W, input) + b.dimshuffle(0, 'x'))
        self.y_pred = T.argmax(self.y, axis=0)
        self.params = [self.W, self.b]
        self.velo = [self.v_W, self.v_b]
        self.input = input
Ejemplo n.º 14
0
 def batch_train(self, data, label):
     """ Batch Training 
     X[i, j, k]: input for layer i
         size of X: nLayer x nNeuron_i x nSample
     D[i, :]: delta for layer i (except input layer)
         size of D: nLayer-1 x nNeuron_i x nSample
     Fd[i, :]: derivative of activation of layer i 
         (except input layer and output layer)
         size of Fd: nLayer-2 x nNeuron_i x nSample
     """
     n_samples = data.shape[1]
     # Add bias unit to input layer
     bias = np.ones((1, n_samples))
     X = [np.concatenate((data, bias), axis=0)]
     Fd = []
     # Forward
     for i in range(self.nLayer - 2):
         si = np.dot(self.W[i].T, X[i])
         xi = self.activation(si)
         xi_deriv = self.activation(si, 1)
         xi = np.concatenate((xi, bias), axis=0)
         X.append(xi)
         Fd.append(xi_deriv)
     so = np.dot(self.W[-1].T, X[-1])
     xo = act.softmax(so)
     # Backpropagation
     o_delta = xo
     o_delta[label, np.arange(n_samples)] -= 1
     D = [o_delta]
     for i in range(self.nLayer - 3, -1, -1):
         delta = self.W[i + 1][0:-1, :].dot(D[-1])
         delta = np.multiply(delta, Fd[i])
         D.append(delta)
     D.reverse()
     # Update weight
     for i in range(self.nLayer - 1):
         self.W[i] += (-self.lr * X[i].dot(D[i].T))
     #Release Memory
     D.clear()
     Fd.clear()
     X.clear()
Ejemplo n.º 15
0
def single_layer_fp(X, W, b, activation="sigmoid"):
    l = []
    for i in range(0, X.shape[1]):
        l.append(1)
    A = np.dot(W, X) + np.outer(b, np.array(l))
    if activation == "linear":
        S = act_fun.linear(A)
    elif activation == "sigmoid":
        S = act_fun.sigmoid(beta, A)
    elif activation == "tanh":
        S = act_fun.tanh(beta, A)
    elif activation == "relu":
        S = act_fun.relu(A)
    elif activation == "softplus":
        S = act_fun.softplus(A)
    elif activation == "elu":
        S = act_fun.elu(delta, A)
    elif activation == "softmax":
        S = act_fun.softmax(A)
    else:
        print("Activation function isn't supported")
    return (A, S)
Ejemplo n.º 16
0
    def test_softmax(self):
        def _ref_softmax(values):
            """
                Taken from Keras' testing code:
                https://github.com/keras-team/keras/blob/ce5728bbd36004c7a17b86e69a8e59b21d6ee6d4/keras/activations_test.py
            """
            m = np.max(values)
            e = np.exp(values - m)
            return e / np.sum(e)

        rtol = 1e-3
        size = 10
        for _ in range(1000):
            x = np.random.uniform(low=-1., high=1., size=size).flatten()
            y_numpy = _ref_softmax(x)
            test_buffer = list_2_swig_float_pointer(x, size)
            y_nn4mc = activation.softmax(test_buffer.cast(), size)
            y_nn4mc = swig_py_object_2_list(y_nn4mc, size)
            y_nn4mc = np.round(y_nn4mc, decimals=5)
            y_numpy = np.round(y_numpy, decimals=5)
            assert np.allclose(y_nn4mc, y_numpy, rtol=rtol)
        print("softmax passed")
def test(test_x, test_y, w1, b1, w2, b2):
    # Flatten input (batch_size, 28, 28) -> (batch_size, 784)
    x = test_x.reshape(test_x.shape[0], -1)

    # Turn labels into their one-hot representations
    y = one_hot_encoder(test_y)

    # Forward Pass
    h1 = np.dot(x, w1) + b1
    a1 = sigmoid(h1)
    h2 = np.dot(a1, w2) + b2
    a2 = softmax(h2)
    out = a2

    # Cross Entropy Loss
    loss = cross_entropy_loss(out, test_y)
    print("Loss: {:.6f}".format(loss))

    # Compute and print accuracy
    pred = np.argmax(out, axis=1)
    pred = pred.reshape(pred.shape[0], 1)
    acc = np.mean(pred == test_y)
    print("Accuracy: {:.2f}%\n".format(acc * 100))
	def train(self, images, labels):
		"""
		Train method

		This method takes a set of images and labels then feeds
		them to the neural network which then backpropogates with its outputs

		This then breaks when it reaches its minimum error and returns the
		weights used to acheive this error

		@param images | list | an array of images
		@param labels | list | an array of labels
		"""
		labels = list(labels)
		if not isinstance(labels[0], list):
			for i, x in enumerate(labels):
				a = [0 for x in range(10)]
				a[x] = 1
				labels[i] = a

		print("minimising images 2")
		for image in images:
			image = softmax(image)
		print("done")

		# n = 20
		while True:
			# if n > 0:
			# 	n -= 1
			for i in range(len(images)):
				image, label = images[i], labels[i]
			# error = self.error
				outputs = self.execute(image, label)
			# if self.error > error and n == 0:
				# print(f"Min error: {self.error}")
				# return self.save_weights()
				self.backpropogate(outputs, label)
Ejemplo n.º 19
0
    def predict(self, X, y=None):
        """Preditc Label for X"""

        p_label = np.empty((0, 0))
        n_samples = X.shape[1]
        n_batch = np.ceil(n_samples / self.batchSize)
        loss = 0
        for i in range(np.uint16(n_batch)):
            end_batch = n_samples if (
                i + 1) * self.batchSize >= n_samples else (i +
                                                           1) * self.batchSize
            cur_batch = end_batch - i * self.batchSize
            Xb = X[:, i * self.batchSize:end_batch]
            # Add bias
            bias = np.ones((1, cur_batch))
            Xb = np.concatenate((Xb, bias), axis=0)
            # Forward
            for k in range(self.nLayer - 2):
                sk = np.dot(self.W[k].T, Xb)
                Xb = self.activation(sk)
                Xb = np.concatenate((Xb, bias), axis=0)
            so = np.dot(self.W[-1].T, Xb)
            Xb = act.softmax(so)
            if y is not None:
                loss += self.loss_function(Xb, y[i * self.batchSize:end_batch])
            p_label = np.append(p_label, np.argmax(Xb, axis=0))
        loss = loss / n_samples

        if y is not None:
            cnt = np.sum(y == p_label)
            correct = (cnt * 10000 // len(y)) / 100
            print("Loss, Correct: ", loss, correct)
            return loss, correct
        else:
            print("Label: ", p_label)
            return None, None
Ejemplo n.º 20
0
 def forward(self, x, t):
     self.t = t
     self.y = softmax(x)
     self.loss = cross_entropy_error(self.y, self.t)
     return self.loss
Ejemplo n.º 21
0
    def __init__(self,
                 input_list,
                 n_in,
                 n_out,
                 n_total,
                 mask,
                 batch,
                 W=None,
                 b=None,
                 M=None):
        w = np.zeros((n_in, n_out))
        np.fill_diagonal(w, 1)
        if W is None:
            #W = theano.shared(np.random.randn(n_in, n_out).astype(dtype=theano.config.floatX)/np.sqrt(n_in))
            W = theano.shared(
                w.astype(dtype=theano.config.floatX) / np.sqrt(n_in))
        if b is None:
            b = theano.shared(
                np.zeros(n_out).astype(dtype=theano.config.floatX))
        if M is None:
            M = theano.shared(0.5 * np.ones(
                (n_total, 2)).astype(dtype=theano.config.floatX))
        self.W = W
        self.b = b
        self.M = M
        self.v_W = theano.shared(
            np.zeros((n_in, n_out)).astype(dtype=theano.config.floatX))
        self.v_b = theano.shared(
            np.zeros(n_out).astype(dtype=theano.config.floatX))
        self.v_M = theano.shared(
            np.zeros((n_total, 2)).astype(dtype=theano.config.floatX))
        self.input_list = input_list
        self.input_list[0] = self.input_list[0]
        self.input_list[1] = (self.input_list[1])[::-1]
        '''
        def Merge(input_seq1, input_seq2, merger):
            return T.dot((input_seq1 * merger[0] + input_seq2 * merger[1]), self.W) + self.b
        self.temp_y = a.softmax((theano.scan(Merge,
            sequences=[self.input_list[0], self.input_list[1], self.M],
                outputs_info=None))[0])
        '''
        def Merge(input_seq1, input_seq2):
            return T.dot((input_seq1 * 1 + input_seq2 * 0), self.W) + self.b

        self.temp_y = a.softmax(
            (theano.scan(Merge,
                         sequences=[self.input_list[0], self.input_list[1]],
                         outputs_info=None))[0])

        self.temp_y = self.temp_y.dimshuffle(1, 0, 2)
        self.mask = mask
        self.batch = batch
        y_pred_list = []
        for i in range(self.batch):
            y_pred_list.append(
                T.set_subtensor(
                    T.argmax(self.temp_y[i], axis=1)[self.mask[i]:], 0))
        self.y_pred = T.stacklists(y_pred_list)

        self.params = [self.W, self.b, self.M]
        self.velo = [self.v_W, self.v_b, self.v_M]
Ejemplo n.º 22
0
    def train_model(self):

        epoch = config['TRAIN']['epoch']
        batch_size = config['TRAIN']['batch_size']
        train_data_ratio = config['TRAIN']['train_data_ratio']
        validation_data_ratio = config['TRAIN']['validation_data_ratio']
        learning_rate = config['TRAIN']['learning_rate']
        optimizer = config['TRAIN']['optimizer']
        l2 = config['TRAIN']['L2']
        lambd = config['TRAIN']['lambd']
        train_acc_list = []
        val_acc_list = []
        cost_list = []
        val_cost_list = []

        numTrain = int(self.trainX.shape[1] * train_data_ratio)
        numVal = int(self.trainX.shape[1] - numTrain)

        trainX = self.trainX[:, 0:numTrain]
        trainY = self.trainY[:, 0:numTrain]
        val_X = self.trainX[:, numTrain:]
        val_Y = self.trainY[:, numTrain:]

        numBatch = numTrain // batch_size

        print("Number of Training Data: " + str(trainX.shape[1]))
        print("Number of Validation Data: " + str(val_X.shape[1]))
        if l2 == "true":
            l2 = True
        else:
            l2 = False

        for i in range(epoch):
            for j in range(numBatch):
                batch_X = trainX[:, j * batch_size:(j + 1) * batch_size]
                batch_Y = trainY[:, j * batch_size:(j + 1) * batch_size]

                AL, caches = self.forward_propagation(batch_X)
                # For softmax
                SAL = softmax(AL)

                cost = self.compute_cost(AL, batch_Y, 'cross_entropy', l2,
                                         lambd)
                print('Epoch - ' + str(i) + ' Mini-batch - ' + str(j) +
                      ' cost ' + str(cost))

                grads = self.backward_propagation(SAL, batch_Y, caches, l2,
                                                  lambd)
                self.update_parameters(self.parameters, grads, learning_rate,
                                       optimizer)

                train_acc, val_acc, val_cost = self.estimate(
                    AL, batch_Y, val_X, val_Y, l2, lambd)
                train_acc_list.append(train_acc)
                val_acc_list.append(val_acc)
                cost_list.append(cost)
                val_cost_list.append(val_cost)
                print('train_accuracy: ' + str(train_acc))
                if val_acc is not None:
                    print('val_accuracy: ' + str(val_acc))

            # Last batch
            if numTrain % batch_size != 0:
                batch_X = trainX[:, numBatch * batch_size:]
                batch_Y = trainY[:, numBatch * batch_size:]
                # print (batch_X.shape)
                AL, caches = self.forward_propagation(batch_X)
                # For softmax
                SAL = softmax(AL)

                cost = self.compute_cost(AL, batch_Y, 'cross_entropy', l2,
                                         lambd)
                print('Epoch - ' + str(i) + ' Mini-batch - ' + str(j) +
                      ' cost ' + str(cost))

                grads = self.backward_propagation(SAL, batch_Y, caches, l2,
                                                  lambd)
                self.update_parameters(self.parameters, grads, learning_rate,
                                       optimizer)
                train_acc, val_acc, val_cost = self.estimate(
                    AL, batch_Y, val_X, val_Y, l2, lambd)
                train_acc_list.append(train_acc)
                val_acc_list.append(val_acc)
                cost_list.append(cost)
                val_cost_list.append(val_cost)
                print('train_accuracy: ' + str(train_acc))
                if val_acc is not None:
                    print('val_accuracy: ' + str(val_acc))

            if i % 1 == 0:
                """
                print (AL[:, 0])
                print (SAL[:, 0])
                print (batch_Y[:, 0])
                """
                # print (self.parameters["W2"])
                # train_acc, val_acc, cost = self.estimate_total(trainX, trainY, val_X, val_Y, l2, lambd)
                # train_acc_list.append(train_acc)
                # val_acc_list.append(val_acc)
                # cost_list.append(cost)

            if i % 10 == 0:
                pass
                """
                print ('Epoch: ' + str(i) + '-' + ' cost ' + str(cost))
                print ('train_accuracy: ' + str(train_acc))
                if val_acc is not None:
                    print ('val_accuracy: ' + str(val_acc))
                """

            # train_acc, val_acc = self.estimate(AL, batch_Y, val_X, val_Y)
            #print ('train_accuracy: ' + str(train_acc))
            #if val_acc is not None:
            #    print ('val_accuracy: ' + str(val_acc))

            # break
            # W1 = self.parameters["W1"]
            # print (W1.shape)
            # print (W1[0].shape)

        return train_acc_list, val_acc_list, cost_list, val_cost_list
Ejemplo n.º 23
0
 def forward_softmax(self, tree):
     Z = np.dot(tree.p, self.Ws) + self.bs
     tree.softmax = act.softmax(Z)
Ejemplo n.º 24
0
def predict(nn, x):
    return softmax(forward(nn, x))
Ejemplo n.º 25
0
 def gradient(self, node, grad):
     grad_A = (activation.softmax(node.parents[0]) +
               -1 * node.parents[1]) * grad
     grad_B = op.zeros_like(node.parents[1])
     return [grad_A, grad_B]
Ejemplo n.º 26
0
 def forward_prop(self, X):
     if self.layer_type is "output":
         return softmax( np.dot(self.W.T, X) )
     else:
         return relu( np.dot(self.W.T, X) )
Ejemplo n.º 27
0
def main(argv):

    # load and pre-process the data
    X, Predict_data, Y = preprocessed.data_preprocess(
        parameter.input_data_path)
    print('| Total train data | structure: {}'.format(X.shape))
    print('| Train Data label | structure: {}'.format(Y.shape))
    print('| Total test Data  | structure: {}'.format(Predict_data.shape))

    # split data into train, validation and test
    train_x, train_y, vali_x, vali_y, test_x, test_y = preprocessed.train_vali_test_split(
        X, Y, parameter.train_rate, parameter.vali_rate, parameter.test_rate)
    print("_______________________________________")
    print('after split\ntrain data shape:\t{}'.format(train_x.shape))
    print('train data label:\t{}'.format(train_y.shape))
    if vali_x is None:
        print(" after data pre-process, validation is none")
    else:
        print('validation data shape:\t{}'.format(vali_x.shape))
    if test_x is None:
        print(" after data pre-process, test data is none")
    else:
        print('test data shape:\t{}'.format(test_x.shape))
    print("_______________________________________")

    # create learning model
    # a model considers batch size, batch normalization, dropout rate, weight decay and way of optimization
    learn_model = model(train_x,
                        train_y,
                        batch_size=get_batch_size(),
                        drop=get_dropout_rate(),
                        learning_rate=get_lr(),
                        regularizer=get_regularizer(),
                        norm=get_norm(),
                        optimizer=get_opt())
    # set validation data into model
    learn_model.validation(vali_x, vali_y)

    # create neural layer1
    learn_model.add_layer(parameter.num_hide1, ini=He(), acti=relu())
    # layer2
    learn_model.add_layer(parameter.num_hide2, ini=He(), acti=relu())
    # layer3
    learn_model.add_layer(parameter.num_hide3, ini=He(), acti=relu())
    # layer4
    learn_model.add_last_layer(ini=Xavier(), acti=softmax())

    # start training
    x_rem = learn_model.fit(epoch=parameter.epoch,
                            learning_rate=parameter.learning_rate)

    # start testing
    learn_model.test(test_x, test_y)

    # plot result
    learn_model.plot(x_rem, True, True)

    # start predict
    print("----------  finish predict, save to predict.h5  ----------")
    predict = learn_model.predict(x=Predict_data).T
    predict = np.argmax(predict, axis=1)
    # print(predict)

    f = h5py.File(parameter.ouput_data_path + "/Predicted_labels.h5", 'a')
    f.create_dataset('/predict', data=predict, dtype=np.float32)
    f.close()
Ejemplo n.º 28
0
Archivo: rnn.py Proyecto: giahy2507/rnn
 def forward_softmax(self, root):
     Z = np.dot(root.p, self.Ws) + self.bs
     A = act.softmax(Z)
     return A
Ejemplo n.º 29
0
import matplotlib.pyplot as plt
import numpy as np

def drawLinePlot(start, end, activationFunction):
    inputs = []
    outputs = []
    for x in np.arange(start, end, 0.5):
        inputs.append(x)
        outputs.append(activationFunction(x))
    return inputs, outputs

x, y = drawLinePlot(-3, 3, activation.linear)
plt.plot(x, y, label='Linear')
x, y = drawLinePlot(-3, 3, activation.sigmoid)
plt.plot(x, y, label='Sigmoid')
x, y = drawLinePlot(-3, 3, activation.leakyRelu)
plt.plot(x, y, label='Leaky ReLU')
x, y = drawLinePlot(-3, 3, activation.tanh)
plt.plot(x, y, label='Tanh')
x, y = drawLinePlot(-3, 3, activation.relu)
plt.plot(x, y, label='ReLU')
x, y = drawLinePlot(-3, 3, activation.swish)
plt.plot(x, y, label='Swish')
plt.plot([-3, -2, -1, 0, 1, 2, 3], activation.softmax([-3, -2, -1, 0, 1, 2, 3]), label='Softmax')

plt.xlabel("Inputs")
plt.ylabel("Outputs")
plt.title("Activation Functions")
plt.legend()
plt.show()
def construct_RNN(n_input,
                  n_output,
                  n_hid_layers=2,
                  archi=128,
                  lr=1e-3,
                  acti_func='ReLU',
                  update_by='RMSProp',
                  dropout_rate=0.2,
                  batchsize=1,
                  scale=0.033,
                  scale_b=0.001,
                  clip_thres=10.0,
                  seed=42):
    """
    Initialize and construct the bidirectional deep RNN with dropout
    Update the RNN using minibatch and RMSProp
    archi: number of neurons of each hidden layer
    """
    x_seq = T.fmatrix()
    y_hat = T.fmatrix()
    minibatch = T.scalar()
    stop_dropout = T.scalar()

    # choose the optimization function
    optimiz_func = {
        'sgd': sgd,
        'momentum': momentum,
        'NAG': NAG,
        'RMSProp': RMSProp,
    }
    update_func = optimiz_func[update_by]

    # initialize the RNN
    print('Start initializing RNN...')
    init = initialize_RNN(n_input, n_output, archi, n_hid_layers, scale,
                          scale_b, clip_thres)
    param_Ws, param_bs, auxis, caches, a_0, parameters = init

    # ############ bidirectional recurrent neural network ###############
    srng = RandomStreams(seed=seed)

    # #### Hidden layers ######
    for l in range(n_hid_layers):
        if l == 0:
            a_seq = x_seq
            z_seq = T.dot(a_seq, param_Ws[0][l])
            z_seq += param_bs[0][l].dimshuffle('x', 0)
            zf_seq = z_seq
            zb_seq = z_seq
        else:
            zf_seq = T.dot(a_seq, param_Ws[1][l - 1])
            zf_seq += param_bs[1][l - 1].dimshuffle('x', 0)
            zb_seq = T.dot(a_seq, param_Ws[2][l - 1])
            zb_seq += param_bs[2][l - 1].dimshuffle('x', 0)

        step = set_step(param_Ws[3], param_bs[3], l, acti_func)
        [af_seq, ab_seq], _ = th.scan(step,
                                      sequences=[zf_seq, zb_seq[::-1]],
                                      outputs_info=[a_0, a_0],
                                      truncate_gradient=-1)

        a_out = T.concatenate([af_seq, ab_seq[::-1]], axis=1)
        dropping = srng.binomial(size=T.shape(a_out), p=(1 - dropout_rate))
        a_seq = ifelse(T.lt(stop_dropout, 1.05),
                       (a_out * dropping).astype('float32'), a_out)
        a_seq /= stop_dropout

    # #### End of Hidden layers ######

    y_pre = T.dot(a_seq, param_Ws[0][1]) + param_bs[0][1].dimshuffle('x', 0)
    y_seq = softmax(y_pre)
    forward = th.function(inputs=[x_seq, stop_dropout], outputs=y_seq)

    cost = T.sum((y_seq - y_hat)**2) + minibatch * 0
    valid = th.function(inputs=[x_seq, y_hat, minibatch, stop_dropout],
                        outputs=cost)
    grads = T.grad(cost, parameters, disconnected_inputs='ignore')

    # ############ end of construction ###############

    updates = update_func(parameters, grads, lr, minibatch, batchsize, auxis,
                          caches)
    rnn_train = th.function(inputs=[x_seq, y_hat, minibatch, stop_dropout],
                            outputs=cost,
                            updates=updates)

    return forward, valid, rnn_train
 def test_softmax(self):
     self.assertEqual(
         activation.softmax(softmax_inp_x).any(), softmax_out_x.any())
Ejemplo n.º 32
0
    def train(self, data, yTrues, learnRate=0.1, epochs=1000, checkRate=50):
        '''
        Uses backpropagation to calculate the partial derivatives (gradience)
        of Loss in regard to each weight and bias, then uses Stochastic
        Gradient Descent (SGD) to adjust each weight and bias such that:

            w <= w-lr*dLdw where:   w is a weight or bias,
                                    lr is the learn rate (typically 0.1)
                                    dwdL is the partial derivative Loss in
                                    regards to that weight or bias

        Each epoch represents one run through the entire dataset, and after
        every 50 epochs (or however many is set by the check rate) the program
        will run a feedforward and print the Epoch number, the Cross Entropy
        Loss and the Accuracy (percent of correct guesses)
        '''
        # a runthrough of the entire data set
        for epoch in range(epochs):
            # a runthrough of one row of data
            for x, yTrue in zip(data, yTrues):
                # execute a feedforward, storing linear sums
                # (results of linear function in neuron)
                hidden_outputs = []
                hidden_totals = []
                for node in self.hidden:
                    hidden_totals.append(node.linearsum(x))
                    hidden_outputs.append(node.feedforward(x))

                soft_totals = [
                    node.linearsum(hidden_outputs) for node in self.soft
                ]
                soft_outputs = softmax(soft_totals)

                # partial derivatives
                # partial L / partial softout for case c (yTrue)
                dL_dsoc = soft_outputs[yTrue] - 1

                # Update softmax layer
                j = 0  # counter for index of current neuron in self.soft
                for node in self.soft:
                    # partial softout for case c / partial total
                    dsoc_dt = d_softmax(yTrue, j, soft_totals)

                    # Update weights
                    for w in range(len(node.weights)):
                        # partial total / partial weight
                        dt_dw = hidden_outputs[w]
                        # partial loss / partial weight
                        partial_derivative = dL_dsoc * dsoc_dt * dt_dw
                        # SGD
                        newweight = (node.weights[w] -
                                     learnRate * partial_derivative)
                        # update weight
                        node.changeweight(newweight, w)

                    # Update biases
                    # partial loss / partial bias
                    partial_derivative = dL_dsoc * dsoc_dt
                    # SGD
                    newbias = node.bias - learnRate * partial_derivative
                    # update bias
                    node.changebias(newbias)
                    # increment counter
                    j += 1
                    # counter for index of current neuron in self.hidden
                    h = 0

                    for hnode in self.hidden:
                        # partial total(softmax layer) / partial h
                        dt_dh = node.weights[h]

                        for w in range(len(hnode.weights)):
                            # partial h / partial w
                            dh_dw = x[w] * activation_derivative(
                                self.hiddenActivation, hidden_totals[h])
                            # partial loss / partial w
                            partial_derivative = (dL_dsoc * dsoc_dt * dt_dh *
                                                  dh_dw)
                            # SGD
                            newweight = (hnode.weights[w] -
                                         (learnRate * partial_derivative))
                            # update weight
                            hnode.changeweight(newweight, w)

                        # partial h / partial bias
                        dh_db = activation_derivative(self.hiddenActivation,
                                                      hidden_totals[h])
                        # partial loss / partial bias
                        partial_derivative = dL_dsoc * dsoc_dt * dt_dh * dh_db
                        # SGD
                        newbias = hnode.bias - learnRate * partial_derivative
                        # update bias
                        hnode.changebias(newbias)
                        # increment counter
                        h += 1

            # Run a feedforward on the data and print an update to the console
            # with the epoch, avg. loss, accuracy
            if epoch % checkRate == 0:
                self.test(data, yTrues, True, epoch)
Ejemplo n.º 33
0
def construct_RNN(n_input, n_output, n_hid_layers=2, archi=128, lr=1e-3,
                  acti_func='ReLU', update_by='RMSProp', dropout_rate=0.2,
                  batchsize=1, scale=0.033, scale_b=0.001, clip_thres=10.0,
                  seed=42):
    """
    Initialize and construct the bidirectional deep RNN with dropout
    Update the RNN using minibatch and RMSProp
    archi: number of neurons of each hidden layer
    """
    x_seq = T.fmatrix()
    y_hat = T.fmatrix()
    minibatch = T.scalar()
    stop_dropout = T.scalar()

    # choose the optimization function
    optimiz_func = {
        'sgd': sgd,
        'momentum': momentum,
        'NAG': NAG,
        'RMSProp': RMSProp,
    }
    update_func = optimiz_func[update_by]

    # initialize the RNN
    print('Start initializing RNN...')
    init = initialize_RNN(n_input, n_output, archi, n_hid_layers,
                          scale, scale_b, clip_thres)
    param_Ws, param_bs, auxis, caches, a_0, parameters = init

    # ############ bidirectional recurrent neural network ###############
    srng = RandomStreams(seed=seed)

    # #### Hidden layers ######
    for l in range(n_hid_layers):
        if l == 0:
            a_seq = x_seq
            z_seq = T.dot(a_seq, param_Ws[0][l])
            z_seq += param_bs[0][l].dimshuffle('x', 0)
            zf_seq = z_seq
            zb_seq = z_seq
        else:
            zf_seq = T.dot(a_seq, param_Ws[1][l - 1])
            zf_seq += param_bs[1][l - 1].dimshuffle('x', 0)
            zb_seq = T.dot(a_seq, param_Ws[2][l - 1])
            zb_seq += param_bs[2][l - 1].dimshuffle('x', 0)

        step = set_step(param_Ws[3], param_bs[3], l, acti_func)
        [af_seq, ab_seq], _ = th.scan(step, sequences=[zf_seq, zb_seq[::-1]],
                                      outputs_info=[a_0, a_0],
                                      truncate_gradient=-1)

        a_out = T.concatenate([af_seq, ab_seq[::-1]], axis=1)
        dropping = srng.binomial(size=T.shape(a_out),
                                 p=(1 - dropout_rate))
        a_seq = ifelse(T.lt(stop_dropout, 1.05),
                       (a_out * dropping).astype('float32'), a_out)
        a_seq /= stop_dropout

    # #### End of Hidden layers ######

    y_pre = T.dot(a_seq, param_Ws[0][1]) + param_bs[0][1].dimshuffle('x', 0)
    y_seq = softmax(y_pre)
    forward = th.function(inputs=[x_seq, stop_dropout], outputs=y_seq)

    cost = T.sum((y_seq - y_hat)**2) + minibatch * 0
    valid = th.function(inputs=[x_seq, y_hat, minibatch, stop_dropout],
                        outputs=cost)
    grads = T.grad(cost, parameters, disconnected_inputs='ignore')

    # ############ end of construction ###############

    updates = update_func(parameters, grads, lr, minibatch,
                          batchsize, auxis, caches)
    rnn_train = th.function(inputs=[x_seq, y_hat, minibatch, stop_dropout],
                            outputs=cost, updates=updates)

    return forward, valid, rnn_train
def construct_LSTM(n_input,
                   n_output,
                   n_hid_layers=2,
                   archi=36,
                   lr=1e-3,
                   update_by='NAG',
                   batchsize=1,
                   scale=0.01,
                   scale_b=0.001,
                   clip_thres=1.0):
    """
    Initialize and construct the bidirectional Long Short-term Memory (LSTM)
    Update the LSTM using minibatch and RMSProp
    archi: number of neurons of each hidden layer
    """
    x_seq = T.fmatrix()
    y_hat = T.fmatrix()
    minibatch = T.scalar()

    # choose the optimization function
    optimiz_func = {
        'sgd': sgd,
        'momentum': momentum,
        'NAG': NAG,
        'RMSProp': RMSProp,
    }
    update_func = optimiz_func[update_by]

    # initialize the LSTM
    print('Start initializing LSTM...')
    init = initialize_LSTM(n_input, n_output, archi, n_hid_layers, scale,
                           scale_b, clip_thres)
    param_Ws, param_bs, auxis, caches, a_0, h_0, parameters = init

    # ############ bidirectional Long Short-term Memory ###############

    # #### Hidden layers ######
    for l in range(n_hid_layers):
        # computing gates
        if l == 0:
            a_seq = x_seq
            W, Wi, Wf, Wo = param_Ws[0][l][:-1]
            b, bi, bf, bo = param_bs[0][l]
            z_seq = T.dot(a_seq, W) + b.dimshuffle('x', 0)
            zi_seq = T.dot(a_seq, Wi) + bi.dimshuffle('x', 0)
            zf_seq = T.dot(a_seq, Wf) + bf.dimshuffle('x', 0)
            zo_seq = T.dot(a_seq, Wo) + bo.dimshuffle('x', 0)

            zf_seq, zif_seq, zff_seq, zof_seq = z_seq, zi_seq, zf_seq, zo_seq
            zb_seq, zib_seq, zfb_seq, zob_seq = z_seq, zi_seq, zf_seq, zo_seq
        else:
            # forward gates
            W_f, Wi_f, Wf_f, Wo_f = param_Ws[1][l - 1]
            b_f, bi_f, bf_f, bo_f = param_bs[1][l - 1]
            zf_seq = T.dot(a_seq, W_f) + b_f.dimshuffle('x', 0)
            zif_seq = T.dot(a_seq, Wi_f) + bi_f.dimshuffle('x', 0)
            zff_seq = T.dot(a_seq, Wf_f) + bf_f.dimshuffle('x', 0)
            zof_seq = T.dot(a_seq, Wo_f) + bo_f.dimshuffle('x', 0)

            # backward gates
            W_b, Wi_b, Wf_b, Wo_b = param_Ws[2][l - 1]
            b_b, bi_b, bf_b, bo_b = param_bs[2][l - 1]
            zb_seq = T.dot(a_seq, W_b) + b_b.dimshuffle('x', 0)
            zib_seq = T.dot(a_seq, Wi_b) + bi_b.dimshuffle('x', 0)
            zfb_seq = T.dot(a_seq, Wf_b) + bf_b.dimshuffle('x', 0)
            zob_seq = T.dot(a_seq, Wo_b) + bo_b.dimshuffle('x', 0)

        # computing cells
        step = set_step(param_Ws[3][l], param_Ws[4][l])

        # Forward direction
        seqs = [zf_seq, zif_seq, zff_seq, zof_seq]
        [cf_seq, hf_seq], _ = th.scan(step,
                                      sequences=seqs,
                                      outputs_info=[a_0, h_0],
                                      truncate_gradient=-1)

        # Backward direction
        seqs = [zb_seq[::-1], zib_seq[::-1], zfb_seq[::-1], zob_seq[::-1]]
        [cb_seq, hb_seq], _ = th.scan(step,
                                      sequences=seqs,
                                      outputs_info=[a_0, h_0],
                                      truncate_gradient=-1)

        a_seq = T.concatenate([hf_seq, hb_seq[::-1]], axis=1)

    # #### End of Hidden layers ######
    y_seq = softmax(T.dot(a_seq, param_Ws[0][0][-1]))
    forward = th.function(inputs=[x_seq], outputs=y_seq)

    cost = T.sum((y_seq - y_hat)**2) + minibatch * 0
    valid = th.function(inputs=[x_seq, y_hat, minibatch], outputs=cost)
    grads = T.grad(cost, parameters, disconnected_inputs='ignore')
    forward_grad = th.function([x_seq, y_hat, minibatch], grads)

    # ############ end of construction ###############

    updates = update_func(parameters, grads, lr, minibatch, batchsize, auxis,
                          caches)
    lstm_train = th.function(inputs=[x_seq, y_hat, minibatch],
                             outputs=cost,
                             updates=updates)

    return forward, valid, lstm_train, forward_grad
Ejemplo n.º 35
0
    X_test = X_test.astype(np.float32) / 255
    r = np.random.permutation(len(y_train))
    X_train = X_train[r]
    y_train = y_train[r]
    X_dev = X_train[:12000]
    y_dev = y_train[:12000]
    X_train = X_train[10000:]
    y_train = y_train[10000:]

    LOG.info("finish data preprocessing.")

    FCs = [
        FullyConnected(784, 256, opts.batch_size, relu()),
        FullyConnected(256, 128, opts.batch_size, relu()),
        FullyConnected(128, 64, opts.batch_size, relu()),
        FullyConnected(64, 10, opts.batch_size, softmax())
    ]

    LOG.info("finish initialization.")

    n_samples = len(y_train)
    order = np.arange(n_samples)
    best_precision, test_precision = 0, 0
    for epochs in range(0, opts.epochs):
        np.random.shuffle(order)
        cost = 0.
        for batch_start in range(0, n_samples, opts.batch_size):
            batch_end = batch_start + opts.batch_size if batch_start \
                        + opts.batch_size < n_samples else n_samples
            batch_id = order[batch_start:batch_end]
            xs, ys = X_train[batch_id], y_train[batch_id]