Ejemplo n.º 1
0
    def bptt(self, x, y):
        assert len(x) == len(y)
        output = Softmax()
        layers = self.forward_propagation(x)
        dU = np.zeros(self.U.shape)
        dV = np.zeros(self.V.shape)
        dW = np.zeros(self.W.shape)

        T = len(layers)
        prev_s_t = np.zeros(self.hidden_dim)
        diff_s = np.zeros(self.hidden_dim)
        for t in range(0, T):
            dmulv = output.diff(layers[t].mulv, y[t])
            input = np.zeros(self.word_dim)
            input[x[t]] = 1
            dprev_s, dU_t, dW_t, dV_t = layers[t].backward(input, prev_s_t, self.U, self.W, self.V, diff_s, dmulv)
            prev_s_t = layers[t].s
            dmulv = np.zeros(self.word_dim)
            for i in range(t-1, max(-1, t-self.bptt_truncate-1), -1):
                input = np.zeros(self.word_dim)
                input[x[i]] = 1
                prev_s_i = np.zeros(self.hidden_dim) if i == 0 else layers[i-1].s
                dprev_s, dU_i, dW_i, dV_i = layers[i].backward(input, prev_s_i, self.U, self.W, self.V, dprev_s, dmulv)
                dU_t += dU_i
                dW_t += dW_i
            dV += dV_t
            dU += dU_t
            dW += dW_t
        return (dU, dW, dV)
Ejemplo n.º 2
0
    def bptt(self, x, y):
        assert len(x) == len(y)
        output = Softmax()
        layers = self.forward_propagation(x)
        dU = np.zeros(self.U.shape)
        dV = np.zeros(self.V.shape)
        dW = np.zeros(self.W.shape)

        T = len(layers)
        prev_s_t = np.zeros(self.hidden_dim)
        diff_s = np.zeros(self.hidden_dim)
        delta = np.zeros(self.hidden_dim)

        for k in range(0, T):
            t = T - k - 1
            input = np.zeros(self.word_dim)
            input[x[t]] = 1
            if t == 0:
                prev_s_t = np.zeros(self.hidden_dim)
            else:
                prev_s_t = layers[t - 1].s
            dmulv = output.diff(layers[t].mulv, y[t])
            delta, dU_t, dW_t, dV_t = layers[t].backward1(
                input, prev_s_t, self.U, self.W, self.V, delta, dmulv)
            dV += dV_t
            dU += dU_t
            dW += dW_t
        return (dU, dW, dV)
Ejemplo n.º 3
0
    def train(self,
              X,
              y,
              num_passes=20000,
              epsilon=0.01,
              reg_lambda=0.01,
              print_loss=False):
        mulGate = MultiplyGate()
        addGate = AddGate()
        layer = Tanh()
        softmaxOutput = Softmax()

        for epoch in range(num_passes):
            # Forward propagation
            input = X
            forward = [(None, None, input)]
            for i in range(len(self.W)):
                mul = mulGate.forward(self.W[i], input)
                add = addGate.forward(mul, self.b[i])
                input = layer.forward(add)
                forward.append((mul, add, input))

            # Back propagation
            dtanh = softmaxOutput.diff(forward[len(forward) - 1][2], y)
            for i in range(len(forward) - 1, 0, -1):
                dadd = layer.backward(forward[i][1], dtanh)
                db, dmul = addGate.backward(forward[i][0], self.b[i - 1], dadd)
                dW, dtanh = mulGate.backward(self.W[i - 1], forward[i - 1][2],
                                             dmul)
                # Add regularization terms (b1 and b2 don't have regularization terms)
                dW += reg_lambda * self.W[i - 1]
                # Gradient descent parameter update
                self.b[i - 1] += -epsilon * db
                self.W[i - 1] += -epsilon * dW

            # write log
            nn_log_instance.w = self.W
            nn_log_instance.b = self.b
            nn_log_instance.forward = forward
            nn_log_instance.write_log()

            if print_loss and epoch % 1000 == 0:
                print("Loss after iteration %i: %f" %
                      (epoch, self.calculate_loss(X, y)))