Ejemplo n.º 1
0
    def train(self, X, y, learning_rate=1e-3, reg=1e-4, decay_rate=1.00, opt='sgd', n_iters=1000,
              batch_size=200, verbose=True):
        lr = learning_rate
        self.reg = reg
        for i in range(n_iters):
            ids = np.random.choice(X.shape[0], batch_size)
            layer1, l1cache = layers.dense_forward(X[ids], self.W1, self.b1)
            layer2, l2cache = layers.non_linearity_forward(layer1, hiddenLayer='relu')
            layer3, l3cache = layers.dense_forward(layer2, self.W2, self.b2)
            layer4, l4cache = layers.non_linearity_forward(layer3, hiddenLayer='sigmoid')
            loss, l5cache = layers.binary_cross_entropy_loss_forward(layer4, y[ids])

            # adding regularization loss
            loss += 0.5*self.reg*(np.sum(layer2*layer2))/(batch_size*batch_size)

            dlayer5 = 1.0
            dlayer4 = layers.binary_cross_entropy_loss_backward(dlayer5, l5cache)
            dlayer3 = layers.non_linearity_backward(dlayer4, l4cache, hiddenLayer='sigmoid')
            dlayer2, dW2, db2 = layers.dense_backward(dlayer3, l3cache)
            dlayer2 += (self.reg*layer2)/batch_size
            dlayer1 = layers.non_linearity_backward(dlayer2, l2cache, hiddenLayer='relu')
            _, dW1, db1 = layers.dense_backward(dlayer1, l1cache)

            if i % 500 == 0:
                lr *= decay_rate
                if verbose:
                    print "Iteration %d, loss = %g" % (i, loss)

            self.params, self.W1 = optimizers.optimize(self.params, self.W1, dW1, lr=lr, name='W1', opt=opt)
            self.params, self.b1 = optimizers.optimize(self.params, self.b1, db1, lr=lr, name='b1', opt=opt)
            self.params, self.W2 = optimizers.optimize(self.params, self.W2, dW2, lr=lr, name='W2', opt=opt)
            self.params, self.b2 = optimizers.optimize(self.params, self.b2, db2, lr=lr, name='b2', opt=opt)

            self.loss_history.append(loss)
Ejemplo n.º 2
0
 def getloss(self, X, y):
     layer1, _ = layers.dense_forward(X, self.W1, self.b1)
     layer2, _ = layers.non_linearity_forward(layer1, hiddenLayer='relu')
     layer3, _ = layers.dense_forward(layer2, self.W2, self.b2)
     layer4, _ = layers.non_linearity_forward(layer3, hiddenLayer='sigmoid')
     loss, _ = layers.binary_cross_entropy_loss_forward(layer4, y)
     loss += 0.5 * self.reg * (np.sum(self.W1 * self.W1) + np.sum(self.W2 * self.W2))
     return loss
Ejemplo n.º 3
0
    def predict(self, X):

        # return the highest value for each row after a forward pass
        l1out, _ = layers.dense_forward(X, self.W1, self.b1)
        l2out, _ = layers.non_linearity_forward(l1out, self.hiddenLayer)
        l3out, _ = layers.dense_forward(l2out, self.W2, self.b2)
        l4out, _ = layers.non_linearity_forward(l3out,self.hiddenLayer)
        l5out, _ = layers.dense_forward(l4out, self.W3, self.b3)
        return np.argmax(l5out, axis=1)
Ejemplo n.º 4
0
    def predict(self, X):

        W1, b1 = self.weights['W1'], self.weights['b1']
        W2, b2 = self.weights['W2'], self.weights['b2']
        W3, b3 = self.weights['W3'], self.weights['b3']

        # return the highest value for each row after a forward pass
        l1out, _ = layers.dense_forward(X, W1, b1)
        l2out, _ = layers.non_linearity_forward(l1out, self.non_linearity)
        l3out, _ = layers.dense_forward(l2out, W2, b2)
        l4out, _ = layers.non_linearity_forward(l3out, self.non_linearity)
        l5out, _ = layers.dense_forward(l4out, W3, b3)
        return np.argmax(l5out, axis=1)
Ejemplo n.º 5
0
    def train(self, X, y, X_val=None, y_val=None, learning_rate=1e-2, reg = 1e-4, decay_rate=0.95, opt='sgd',
              n_iters=5000, batch_size=200, verbose=1):
        lr = learning_rate
        for i in xrange(n_iters):
            # adding dense layer1
            ids = np.random.choice(X.shape[0], batch_size)
            l1out, l1cache = layers.dense_forward(X[ids], self.W1, self.b1)
            # adding non-linearity layer2
            l2out, l2cache = layers.non_linearity_forward(l1out,self.hiddenLayer)
            # adding dense layer3
            l3out, l3cache = layers.dense_forward(l2out, self.W2, self.b2)
            # adding non-linearity layer4
            l4out,l4cache = layers.non_linearity_forward(l3out, self.hiddenLayer)
            # adding dense layer5
            l5out,l5cache = layers.dense_forward(l4out,self.W3, self.b3)
            # adding softmax layer
            loss, l6cache = layers.softmax_loss_forward(l5out, y[ids])
            loss = loss + 0.5*reg*(np.sum(self.W1**2) + np.sum(self.W2**2) + np.sum(self.W3**2))
            self.loss_history.append(loss)
            if verbose and i % 500 == 0:
                lr *= decay_rate
                print "Iteration %d, loss = %f" % (i, loss)
                if X_val is not None and y_val is not None:
                    print "Validation Accuracy :%f" % (self.accuracy(X_val, y_val))

            dlayer6 = 1.0
            dlayer5 = layers.softmax_loss_backward(dlayer6, l6cache)
            dlayer4, dW3, db3 = layers.dense_backward(dlayer5, l5cache)
            dlayer3 = layers.non_linearity_backward(dlayer4, l4cache, self.hiddenLayer)
            dlayer2, dW2, db2 = layers.dense_backward(dlayer3, l3cache)
            dlayer1 = layers.non_linearity_backward(dlayer2, l2cache, self.hiddenLayer)
            _, dW1, db1 = layers.dense_backward(dlayer1, l1cache)

            self.gradientLayer1.append(np.mean(np.abs(dlayer1)))
            self.gradientLayer2.append(np.mean(np.abs(dlayer3)))

            self.params, self.W1 = optimizers.optimize(self.params, self.W1, dW1, lr=lr, name='W1', opt=opt)
            self.params, self.b1 = optimizers.optimize(self.params, self.b1, db1, lr=lr, name='b1', opt=opt)
            self.params, self.W2 = optimizers.optimize(self.params, self.W2, dW2, lr=lr, name='W2', opt=opt)
            self.params, self.b2 = optimizers.optimize(self.params, self.b2, db2, lr=lr, name='b2', opt=opt)
            self.params, self.W3 = optimizers.optimize(self.params, self.W3, dW3, lr=lr, name='W3', opt=opt)
            self.params, self.b3 = optimizers.optimize(self.params, self.b3, db3, lr=lr, name='b3', opt=opt)

            # gradients due to regularization
            self.W1 += reg * dW1
            self.W2 += reg * dW2
            self.W3 += reg * dW3
Ejemplo n.º 6
0
 def predict(self, X):
     N, T, D = X.shape
     h0 = np.zeros((N, self.hidden_dim))
     layer1, l1cache = rnn_layers.rnn_forward(X, h0, self.Wx, self.Wh,
                                              self.b, self.non_liniearity)
     final_layer = (layer1[:, T - 1, :])
     layer2, _ = layers.dense_forward(final_layer, self.W1, self.b1)
     return np.argmax(layer2, axis=1)
def test_denselayer():
    x = np.random.randn(10, 6)
    w = np.random.randn(6, 5)
    b = np.random.randn(5)
    dout = np.random.randn(10, 5)

    dx_num = eval_numerical_gradient_array(lambda x: layers.dense_forward(x, w, b)[0], x, dout)
    dw_num = eval_numerical_gradient_array(lambda w: layers.dense_forward(x, w, b)[0], w, dout)
    db_num = eval_numerical_gradient_array(lambda b: layers.dense_forward(x, w, b)[0], b, dout)

    _, cache = layers.dense_forward(x, w, b)
    dx, dw, db = layers.dense_backward(dout, cache)

    # The error should be around 1e-10
    print 'Testing dense layers:'
    print 'dx error: ', rel_error(dx_num, dx)
    print 'dw error: ', rel_error(dw_num, dw)
    print 'db error: ', rel_error(db_num, db)
def test_denselayer():
    x = np.random.randn(10, 6)
    w = np.random.randn(6, 5)
    b = np.random.randn(5)
    dout = np.random.randn(10, 5)

    dx_num = eval_numerical_gradient_array(
        lambda x: layers.dense_forward(x, w, b)[0], x, dout)
    dw_num = eval_numerical_gradient_array(
        lambda w: layers.dense_forward(x, w, b)[0], w, dout)
    db_num = eval_numerical_gradient_array(
        lambda b: layers.dense_forward(x, w, b)[0], b, dout)

    _, cache = layers.dense_forward(x, w, b)
    dx, dw, db = layers.dense_backward(dout, cache)

    # The error should be around 1e-10
    print 'Testing dense layers:'
    print 'dx error: ', rel_error(dx_num, dx)
    print 'dw error: ', rel_error(dw_num, dw)
    print 'db error: ', rel_error(db_num, db)
Ejemplo n.º 9
0
    def train(self, X, learning_rate=1e-2, batch_size=100, nb_epochs=1):
        """
        Training based on CBOW model using negative sampling
        :param nb_epochs: number of iterations
        :param batch_size: the number of sentences trained upon in 1 iteration
        :param learning_rate:
        :param X: list of sentences used for training
        """
        N = len(X)
        start_index = self.word_to_index[self.start_token]
        end_index = self.word_to_index[self.end_token]
        unknown_index = self.word_to_index[self.unknown_token]
        id_x = []
        for i in xrange(N):
            sentence = nltk.word_tokenize(X[i])
            if len(sentence) == 0:
                continue
            id_x.append(start_index)
            for word in sentence:
                if word in self.word_to_index:
                    id_x.append(self.word_to_index[word])
                else:
                    id_x.append(unknown_index)
            id_x.append(end_index)

        corpus_size = len(id_x)
        print corpus_size

        n_iters = corpus_size//batch_size

        for epoch in xrange(nb_epochs):
            for itr in xrange(n_iters):
                batch = np.random.randint(corpus_size, size=batch_size)
                trX = np.zeros([batch_size, self.size])
                trY = np.zeros([batch_size], dtype=np.int32)
                context = []
                ids_to_update = np.zeros([batch_size], dtype=np.int32)
                for id, w_id in enumerate(batch):
                    context_ids = id_x[max(0, w_id-self.window):w_id] + id_x[w_id+1:min(w_id+1+self.window, corpus_size)]
                    context.append(context_ids)
                    context_window = np.array(context_ids)
                    trX[id] = np.mean(self.W_inp[context_window, :], axis=0)
                    trY[id] = id
                    ids_to_update[id] = id_x[w_id]

                context = np.array(context)
                trX, trY, ids_to_update, context = shuffle(trX, trY, ids_to_update, context, random_state=0)
                W = self.W_out[ids_to_update]
                # print trX, trY, ids_to_update, context, W
                b = np.zeros([batch_size])
                layer1, l1cache = layers.dense_forward(trX, W.T, b)
                layer2, l2cache = layers.sigmoid_forward(layer1)
                loss, l3cache = layers.softmax_loss_forward(layer2, trY)
                self.loss_history.append(loss)

                dlayer3 = 1.0
                dlayer2 = layers.softmax_loss_backward(dlayer3, l3cache)
                dlayer1 = layers.sigmoid_backward(dlayer2, l2cache)
                dx_inp, dW_tmp, db = layers.dense_backward(dlayer1, l1cache)
                dW = dW_tmp.T

                for i in xrange(batch_size):
                    self.W_inp[context[i], :] -= (learning_rate * dx_inp[i])/len(context[i])
                self.W_out[ids_to_update, :] -= learning_rate * dW
Ejemplo n.º 10
0
    def train(self,
              X,
              y,
              X_val=None,
              y_val=None,
              learning_rate=1e-2,
              reg=1e-4,
              decay_rate=0.95,
              opt='sgd',
              n_iters=5000,
              batch_size=200,
              verbose=1):
        lr = learning_rate
        for i in xrange(n_iters):

            W1, b1 = self.weights['W1'], self.weights['b1']
            W2, b2 = self.weights['W2'], self.weights['b2']
            W3, b3 = self.weights['W3'], self.weights['b3']

            # dense layer1
            ids = np.random.choice(X.shape[0], batch_size)
            l1out, l1cache = layers.dense_forward(X[ids], W1, b1)
            # non-linearity layer2
            l2out, l2cache = layers.non_linearity_forward(
                l1out, self.non_linearity)
            # dense layer3
            l3out, l3cache = layers.dense_forward(l2out, W2, b2)
            # non-linearity layer4
            l4out, l4cache = layers.non_linearity_forward(
                l3out, self.non_linearity)
            # dense layer5
            l5out, l5cache = layers.dense_forward(l4out, W3, b3)
            # softmax layer
            loss, l6cache = layers.softmax_loss_forward(l5out, y[ids])
            loss = loss + 0.5 * reg * (np.sum(W1**2) + np.sum(W2**2) +
                                       np.sum(W3**2))
            self.loss_history.append(loss)
            if verbose and i % 500 == 0:
                lr *= decay_rate
                print "Iteration %d, loss = %f" % (i, loss)
                if X_val is not None and y_val is not None:
                    print "Validation Accuracy :%f" % (self.accuracy(
                        X_val, y_val))

            dlayer6 = 1.0
            dlayer5 = layers.softmax_loss_backward(dlayer6, l6cache)
            dlayer4, dW3, db3 = layers.dense_backward(dlayer5, l5cache)
            dlayer3 = layers.non_linearity_backward(dlayer4, l4cache,
                                                    self.non_linearity)
            dlayer2, dW2, db2 = layers.dense_backward(dlayer3, l3cache)
            dlayer1 = layers.non_linearity_backward(dlayer2, l2cache,
                                                    self.non_linearity)
            _, dW1, db1 = layers.dense_backward(dlayer1, l1cache)

            self.gradientLayer1.append(np.mean(np.abs(dlayer1)))
            self.gradientLayer2.append(np.mean(np.abs(dlayer3)))

            # gradients due to regularization
            dW1 += reg * W1
            dW2 += reg * W2
            dW3 += reg * W3

            self.params, W1 = optimizers.optimize(self.params,
                                                  W1,
                                                  dW1,
                                                  lr=lr,
                                                  name='W1',
                                                  opt=opt)
            self.params, b1 = optimizers.optimize(self.params,
                                                  b1,
                                                  db1,
                                                  lr=lr,
                                                  name='b1',
                                                  opt=opt)
            self.params, W2 = optimizers.optimize(self.params,
                                                  W2,
                                                  dW2,
                                                  lr=lr,
                                                  name='W2',
                                                  opt=opt)
            self.params, b2 = optimizers.optimize(self.params,
                                                  b2,
                                                  db2,
                                                  lr=lr,
                                                  name='b2',
                                                  opt=opt)
            self.params, W3 = optimizers.optimize(self.params,
                                                  W3,
                                                  dW3,
                                                  lr=lr,
                                                  name='W3',
                                                  opt=opt)
            self.params, b3 = optimizers.optimize(self.params,
                                                  b3,
                                                  db3,
                                                  lr=lr,
                                                  name='b3',
                                                  opt=opt)

            self.weights['W1'], self.weights['b1'] = W1, b1
            self.weights['W2'], self.weights['b2'] = W2, b2
            self.weights['W3'], self.weights['b3'] = W3, b3
Ejemplo n.º 11
0
    def train(self,
              X,
              y,
              learning_rate=1e-2,
              opt='sgd',
              n_iters=5000,
              batch_size=200,
              verbose=1):
        lr = learning_rate
        N, T, D = X.shape
        for i in xrange(n_iters):
            ids = np.random.choice(X.shape[0], batch_size)
            h0 = np.zeros((batch_size, self.hidden_dim))
            layer1, l1cache = rnn_layers.rnn_forward(X[ids], h0, self.Wx,
                                                     self.Wh, self.b,
                                                     self.non_liniearity)
            final_layer = (layer1[:, T - 1, :])
            layer2, l2cache = layers.dense_forward(final_layer, self.W1,
                                                   self.b1)
            loss, l3cache = layers.softmax_loss_forward(layer2, y[ids])
            self.loss_history.append(loss)

            if verbose == 1 and i % 500 == 0:
                print 'Iteration %d: loss %g' % (i, loss)

            dlayer3 = 1.0
            dlayer2 = layers.softmax_loss_backward(dlayer3, l3cache)
            dlayer1, dW1, db1 = layers.dense_backward(dlayer2, l2cache)
            dh = np.zeros((batch_size, T, self.hidden_dim))
            dh[:, T - 1, :] = dlayer1
            _, _, dWx, dWh, db = rnn_layers.rnn_backward(dh, l1cache)

            self.params, self.Wx = optimizers.optimize(self.params,
                                                       self.Wx,
                                                       dWx,
                                                       lr=lr,
                                                       name='Wx',
                                                       opt=opt)
            self.params, self.Wh = optimizers.optimize(self.params,
                                                       self.Wh,
                                                       dWh,
                                                       lr=lr,
                                                       name='Wh',
                                                       opt=opt)
            self.params, self.b = optimizers.optimize(self.params,
                                                      self.b,
                                                      db,
                                                      lr=lr,
                                                      name='b',
                                                      opt=opt)
            self.params, self.W1 = optimizers.optimize(self.params,
                                                       self.W1,
                                                       dW1,
                                                       lr=lr,
                                                       name='W1',
                                                       opt=opt)
            self.params, self.b1 = optimizers.optimize(self.params,
                                                       self.b1,
                                                       db1,
                                                       lr=lr,
                                                       name='b1',
                                                       opt=opt)
Ejemplo n.º 12
0
 def predict(self, X):
     l1, _ = layers.dense_forward(X, self.W1, self.b1)
     l2, _ = layers.non_linearity_forward(l1, hiddenLayer='relu')
     l3, _ = layers.dense_forward(l2, self.W2, self.b2)
     l4, _ = layers.non_linearity_forward(l3, hiddenLayer='sigmoid')
     return l4