Esempio n. 1
0
    def train(self, X, y, learning_rate=1e-3, reg=1e-4, decay_rate=1.00, opt='sgd', n_iters=1000,
              batch_size=200, verbose=True):
        lr = learning_rate
        self.reg = reg
        for i in range(n_iters):
            ids = np.random.choice(X.shape[0], batch_size)
            layer1, l1cache = layers.dense_forward(X[ids], self.W1, self.b1)
            layer2, l2cache = layers.non_linearity_forward(layer1, hiddenLayer='relu')
            layer3, l3cache = layers.dense_forward(layer2, self.W2, self.b2)
            layer4, l4cache = layers.non_linearity_forward(layer3, hiddenLayer='sigmoid')
            loss, l5cache = layers.binary_cross_entropy_loss_forward(layer4, y[ids])

            # adding regularization loss
            loss += 0.5*self.reg*(np.sum(layer2*layer2))/(batch_size*batch_size)

            dlayer5 = 1.0
            dlayer4 = layers.binary_cross_entropy_loss_backward(dlayer5, l5cache)
            dlayer3 = layers.non_linearity_backward(dlayer4, l4cache, hiddenLayer='sigmoid')
            dlayer2, dW2, db2 = layers.dense_backward(dlayer3, l3cache)
            dlayer2 += (self.reg*layer2)/batch_size
            dlayer1 = layers.non_linearity_backward(dlayer2, l2cache, hiddenLayer='relu')
            _, dW1, db1 = layers.dense_backward(dlayer1, l1cache)

            if i % 500 == 0:
                lr *= decay_rate
                if verbose:
                    print "Iteration %d, loss = %g" % (i, loss)

            self.params, self.W1 = optimizers.optimize(self.params, self.W1, dW1, lr=lr, name='W1', opt=opt)
            self.params, self.b1 = optimizers.optimize(self.params, self.b1, db1, lr=lr, name='b1', opt=opt)
            self.params, self.W2 = optimizers.optimize(self.params, self.W2, dW2, lr=lr, name='W2', opt=opt)
            self.params, self.b2 = optimizers.optimize(self.params, self.b2, db2, lr=lr, name='b2', opt=opt)

            self.loss_history.append(loss)
Esempio n. 2
0
 def getloss(self, X, y):
     layer1, _ = layers.dense_forward(X, self.W1, self.b1)
     layer2, _ = layers.non_linearity_forward(layer1, hiddenLayer='relu')
     layer3, _ = layers.dense_forward(layer2, self.W2, self.b2)
     layer4, _ = layers.non_linearity_forward(layer3, hiddenLayer='sigmoid')
     loss, _ = layers.binary_cross_entropy_loss_forward(layer4, y)
     loss += 0.5 * self.reg * (np.sum(self.W1 * self.W1) + np.sum(self.W2 * self.W2))
     return loss
Esempio n. 3
0
    def predict(self, X):

        # return the highest value for each row after a forward pass
        l1out, _ = layers.dense_forward(X, self.W1, self.b1)
        l2out, _ = layers.non_linearity_forward(l1out, self.hiddenLayer)
        l3out, _ = layers.dense_forward(l2out, self.W2, self.b2)
        l4out, _ = layers.non_linearity_forward(l3out,self.hiddenLayer)
        l5out, _ = layers.dense_forward(l4out, self.W3, self.b3)
        return np.argmax(l5out, axis=1)
Esempio n. 4
0
    def predict(self, X):

        W1, b1 = self.weights['W1'], self.weights['b1']
        W2, b2 = self.weights['W2'], self.weights['b2']
        W3, b3 = self.weights['W3'], self.weights['b3']

        # return the highest value for each row after a forward pass
        l1out, _ = layers.dense_forward(X, W1, b1)
        l2out, _ = layers.non_linearity_forward(l1out, self.non_linearity)
        l3out, _ = layers.dense_forward(l2out, W2, b2)
        l4out, _ = layers.non_linearity_forward(l3out, self.non_linearity)
        l5out, _ = layers.dense_forward(l4out, W3, b3)
        return np.argmax(l5out, axis=1)
Esempio n. 5
0
    def train(self, X, y, X_val=None, y_val=None, learning_rate=1e-2, reg = 1e-4, decay_rate=0.95, opt='sgd',
              n_iters=5000, batch_size=200, verbose=1):
        lr = learning_rate
        for i in xrange(n_iters):
            # adding dense layer1
            ids = np.random.choice(X.shape[0], batch_size)
            l1out, l1cache = layers.dense_forward(X[ids], self.W1, self.b1)
            # adding non-linearity layer2
            l2out, l2cache = layers.non_linearity_forward(l1out,self.hiddenLayer)
            # adding dense layer3
            l3out, l3cache = layers.dense_forward(l2out, self.W2, self.b2)
            # adding non-linearity layer4
            l4out,l4cache = layers.non_linearity_forward(l3out, self.hiddenLayer)
            # adding dense layer5
            l5out,l5cache = layers.dense_forward(l4out,self.W3, self.b3)
            # adding softmax layer
            loss, l6cache = layers.softmax_loss_forward(l5out, y[ids])
            loss = loss + 0.5*reg*(np.sum(self.W1**2) + np.sum(self.W2**2) + np.sum(self.W3**2))
            self.loss_history.append(loss)
            if verbose and i % 500 == 0:
                lr *= decay_rate
                print "Iteration %d, loss = %f" % (i, loss)
                if X_val is not None and y_val is not None:
                    print "Validation Accuracy :%f" % (self.accuracy(X_val, y_val))

            dlayer6 = 1.0
            dlayer5 = layers.softmax_loss_backward(dlayer6, l6cache)
            dlayer4, dW3, db3 = layers.dense_backward(dlayer5, l5cache)
            dlayer3 = layers.non_linearity_backward(dlayer4, l4cache, self.hiddenLayer)
            dlayer2, dW2, db2 = layers.dense_backward(dlayer3, l3cache)
            dlayer1 = layers.non_linearity_backward(dlayer2, l2cache, self.hiddenLayer)
            _, dW1, db1 = layers.dense_backward(dlayer1, l1cache)

            self.gradientLayer1.append(np.mean(np.abs(dlayer1)))
            self.gradientLayer2.append(np.mean(np.abs(dlayer3)))

            self.params, self.W1 = optimizers.optimize(self.params, self.W1, dW1, lr=lr, name='W1', opt=opt)
            self.params, self.b1 = optimizers.optimize(self.params, self.b1, db1, lr=lr, name='b1', opt=opt)
            self.params, self.W2 = optimizers.optimize(self.params, self.W2, dW2, lr=lr, name='W2', opt=opt)
            self.params, self.b2 = optimizers.optimize(self.params, self.b2, db2, lr=lr, name='b2', opt=opt)
            self.params, self.W3 = optimizers.optimize(self.params, self.W3, dW3, lr=lr, name='W3', opt=opt)
            self.params, self.b3 = optimizers.optimize(self.params, self.b3, db3, lr=lr, name='b3', opt=opt)

            # gradients due to regularization
            self.W1 += reg * dW1
            self.W2 += reg * dW2
            self.W3 += reg * dW3
Esempio n. 6
0
def rnn_step_forward(x, prev_h, Wx, Wh, b, non_liniearity='tanh'):
    """
    Run the forward pass for a single timestep of a vanilla RNN that uses the specified
    activation function.

    The input data has dimension D, the hidden state has dimension H, and we use
    a minibatch size of N.
    :param x: Input data for this timestep, of shape (N, D)
    :param prev_h: Hidden state from previous timestep, of shape (N, H)
    :param Wx: Weight matrix for input-to-hidden connections, of shape (D, H)
    :param Wh: Weight matrix for hidden-to-hidden connections, of shape (H, H)
    :param b: Biases of shape (H,)
    :param non_liniearity: relu/sigmoid or tanh non-linearity to be used
    :return:
    :next_h: Next hidden state, of shape (N, H)
    :cache: Tuple of values needed for the backward pass.
    """
    tmp = np.dot(x, Wx) + np.dot(prev_h, Wh) + b
    next_h, _ = layers.non_linearity_forward(tmp, hiddenLayer=non_liniearity)
    cache = (x, prev_h, Wx, Wh, b, non_liniearity)
    return next_h, cache
Esempio n. 7
0
    def train(self,
              X,
              y,
              X_val=None,
              y_val=None,
              learning_rate=1e-2,
              reg=1e-4,
              decay_rate=0.95,
              opt='sgd',
              n_iters=5000,
              batch_size=200,
              verbose=1):
        lr = learning_rate
        for i in xrange(n_iters):

            W1, b1 = self.weights['W1'], self.weights['b1']
            W2, b2 = self.weights['W2'], self.weights['b2']
            W3, b3 = self.weights['W3'], self.weights['b3']

            # dense layer1
            ids = np.random.choice(X.shape[0], batch_size)
            l1out, l1cache = layers.dense_forward(X[ids], W1, b1)
            # non-linearity layer2
            l2out, l2cache = layers.non_linearity_forward(
                l1out, self.non_linearity)
            # dense layer3
            l3out, l3cache = layers.dense_forward(l2out, W2, b2)
            # non-linearity layer4
            l4out, l4cache = layers.non_linearity_forward(
                l3out, self.non_linearity)
            # dense layer5
            l5out, l5cache = layers.dense_forward(l4out, W3, b3)
            # softmax layer
            loss, l6cache = layers.softmax_loss_forward(l5out, y[ids])
            loss = loss + 0.5 * reg * (np.sum(W1**2) + np.sum(W2**2) +
                                       np.sum(W3**2))
            self.loss_history.append(loss)
            if verbose and i % 500 == 0:
                lr *= decay_rate
                print "Iteration %d, loss = %f" % (i, loss)
                if X_val is not None and y_val is not None:
                    print "Validation Accuracy :%f" % (self.accuracy(
                        X_val, y_val))

            dlayer6 = 1.0
            dlayer5 = layers.softmax_loss_backward(dlayer6, l6cache)
            dlayer4, dW3, db3 = layers.dense_backward(dlayer5, l5cache)
            dlayer3 = layers.non_linearity_backward(dlayer4, l4cache,
                                                    self.non_linearity)
            dlayer2, dW2, db2 = layers.dense_backward(dlayer3, l3cache)
            dlayer1 = layers.non_linearity_backward(dlayer2, l2cache,
                                                    self.non_linearity)
            _, dW1, db1 = layers.dense_backward(dlayer1, l1cache)

            self.gradientLayer1.append(np.mean(np.abs(dlayer1)))
            self.gradientLayer2.append(np.mean(np.abs(dlayer3)))

            # gradients due to regularization
            dW1 += reg * W1
            dW2 += reg * W2
            dW3 += reg * W3

            self.params, W1 = optimizers.optimize(self.params,
                                                  W1,
                                                  dW1,
                                                  lr=lr,
                                                  name='W1',
                                                  opt=opt)
            self.params, b1 = optimizers.optimize(self.params,
                                                  b1,
                                                  db1,
                                                  lr=lr,
                                                  name='b1',
                                                  opt=opt)
            self.params, W2 = optimizers.optimize(self.params,
                                                  W2,
                                                  dW2,
                                                  lr=lr,
                                                  name='W2',
                                                  opt=opt)
            self.params, b2 = optimizers.optimize(self.params,
                                                  b2,
                                                  db2,
                                                  lr=lr,
                                                  name='b2',
                                                  opt=opt)
            self.params, W3 = optimizers.optimize(self.params,
                                                  W3,
                                                  dW3,
                                                  lr=lr,
                                                  name='W3',
                                                  opt=opt)
            self.params, b3 = optimizers.optimize(self.params,
                                                  b3,
                                                  db3,
                                                  lr=lr,
                                                  name='b3',
                                                  opt=opt)

            self.weights['W1'], self.weights['b1'] = W1, b1
            self.weights['W2'], self.weights['b2'] = W2, b2
            self.weights['W3'], self.weights['b3'] = W3, b3
Esempio n. 8
0
 def predict(self, X):
     l1, _ = layers.dense_forward(X, self.W1, self.b1)
     l2, _ = layers.non_linearity_forward(l1, hiddenLayer='relu')
     l3, _ = layers.dense_forward(l2, self.W2, self.b2)
     l4, _ = layers.non_linearity_forward(l3, hiddenLayer='sigmoid')
     return l4