Beispiel #1
0
    def train_step(self, X_train, y_train, h):
        ys, caches = [], []
        total_loss = 0
        grads = {k: np.zeros_like(v) for k, v in self.model.items()}

        # forward pass and store values for bptt
        for x, y in zip(X_train, y_train):
            y_pred, h, cache = self._forward(x, h)
            p = softmax(y_pred)
            log_likelihood = -np.log(p[range(y_pred.shape[0]), y])
            total_loss += np.sum(log_likelihood) / y_pred.shape[0]
            ys.append(y_pred)
            caches.append(cache)

        total_loss /= X_train.shape[0]

        # backprop through time
        dh_next = np.zeros((1, self.h_size))
        for t in reversed(range(len(X_train))):
            grad, dh_next = self._backward(ys[t], y_train[t], dh_next,
                                           caches[t])
            # sum up the gradients for each time step
            for k in grads.keys():
                grads[k] += grad[k]

        # clip vanishing/exploding gradients
        for k, v in grads.items():
            grads[k] = np.clip(v, -5.0, 5.0)

        return loss, grads, h
Beispiel #2
0
def SoftmaxLoss(X, y):
    m = y.shape[0]
    p = softmax(X)
    log_likelihood = -np.log(p[range(m), y])
    loss = np.sum(log_likelihood) / m
    dx = p.copy()
    dx[range(m), y] -= 1
    dx /= m
    return loss, dx
Beispiel #3
0
    def _backward(self, out, y, dh_next, cache):

        X_onehot, h_prev = cache

        # gradient of output from froward step
        dout = softmax(out)
        dout[range(len(y)), y] -= 1
        # fully connected backward step
        dWhy = X_onehot.T @ dout
        dby = np.sum(dWhy, axis=0).reshape(1, -1)
        dh = dout @ self.dWhy.T
        # gradient through tanh
        dh = dout * (1 - out**2)
        # add up gradient from previous gradient
        dh += dh_next
        # hidden state
        dbh = dh
        dWhh = h_prev.T @ dh
        dWxh = X_onehot.T @ dh
        dh_next = dh @ Whh.T

        grads = dict(Wxh=dWxh, Whh=dWhh, Why=dWhy, bh=dbh, by=dby)

        return grads, dh_next
Beispiel #4
0
 def predict(self, X):
     X = self.forward(X)
     return np.argmax(softmax(X), axis=1)
Beispiel #5
0
 def evaluate(self, X, y):
     out = self.forward(X)
     loss, dout = self.loss_func(out, y)
     return np.argmax(softmax(X), axis=1), loss
Beispiel #6
0
 def predict(self, X):
     X = self.forward(X)
     # print(np.argmax(softmax(X), axis=1))
     return softmax(X)