コード例 #1
0
    def bptt(self, x, y):
        # The total number of time steps
        t_steps = len(x)
        self.null_deltas()
        f, i, o, c, c_curr, h, y_ = self.forward(x)

        # y_ - 1 since 1 should the probability of choosing the correct word
        delta_y_ = y_
        delta_y_[np.arange(len(y)), y] -= 1.
        delta_h = np.zeros(h.shape)
        delta_c = np.zeros(c.shape)
        delta_f = np.zeros(f.shape)
        delta_i = np.zeros(i.shape)
        delta_o = np.zeros(o.shape)
        delta_c_curr = np.zeros(c_curr.shape)

        # For each output backwards...
        for t in np.arange(t_steps)[::-1]:
            # one hot encoding
            x_t = np.zeros((self.word_dim, 1))
            x_t[x[t]] = 1

            delta_h[t] = np.dot(self.w_v.T, delta_y_[t]) + delta_h[t + 1]
            delta_c[t] = delta_c[t + 1] * f[t + 1] + delta_h[t] * o[t] * dtanh(
                c[t])
            delta_f[t] = delta_c[t] * c[t - 1] * dsigmoid(f[t])
            delta_i[t] = delta_c[t] * c_curr[t] * dsigmoid(i[t])
            delta_o[t] = delta_h[t] * dsigmoid(o[t]) * np.tanh(c[t])
            delta_c_curr[t] += delta_c[t] * i[t] * dtanh(c_curr[t])

            # W_v, b_v
            self.dLdWv += np.outer(delta_y_[t], h[t].T)
            self.dLdBv += delta_y_[t]

            # W_fx, W_fh, b_f
            self.dLdWfx += np.dot(delta_f[t], x_t.T)
            self.dLdWfh += np.dot(delta_f[t], h[t - 1].T)
            self.dLdBf += delta_f[t]

            # W_ix, W_ih, b_i
            self.dLdWix += np.dot(delta_i[t], x_t.T)
            self.dLdWih += np.dot(delta_i[t], h[t - 1].T)
            self.dLdBi += delta_i[t]

            # W_cx, W_ch, b_c
            self.dLdWcx += np.dot(delta_c_curr[t], x_t.T)
            self.dLdWch += np.dot(delta_c_curr[t], h[t - 1].T)
            self.dLdBc += delta_c_curr[t]

            # W_ox, W_oh, b_o
            self.dLdWox += np.dot(delta_o[t], x_t.T)
            self.dLdWoh += np.dot(delta_o[t], h[t - 1].T)
            self.dLdBo += delta_o[t]

        self.clip_gradients()
コード例 #2
0
 def _backwards(self, X, y, i):
     # output layer error equals output-output layer
     # output error delta is equal to the error * the derivative of the
     # logicistic function(sigmoid) of output layer array
     # hidden layer error equals matrix multiplication of output delta and
     # transpose of w2
     # hidden error delta is equal to the error * the derivative of the
     # logicistic function(sigmoid) of hidden layer array
     # update bias and weights
     hidden, output  = self._forward(X)
     d_o = y - output
     if (i % 10000) == 0:
         print(np.mean(np.abs(d_o)))
     d_o = d_o * utils.dsigmoid(output)
     d_h = np.dot(d_o, self.w2.T) * utils.dsigmoid(hidden)
     self.w2 += self.learning_rate * np.dot(hidden.T, d_o)
     self.b2 += self.learning_rate * np.sum(d_o, axis=0, keepdims=True)
     self.w += self.learning_rate * np.dot(X.T, d_h)
     self.b += self.learning_rate * np.sum(d_h, axis=0, keepdims=True)
コード例 #3
0
    def backward(self, target, dh_next, dC_next, C_prev, z, f, i, C_bar, C, o,
                 h, v, y):
        # the following code still needs to be modified.
        # for example: p -> self
        dv = np.copy(y)
        dv[target] -= 1

        self.W_v.d += np.dot(dv, h.T)
        self.b_v.d += dv

        dh = np.dot(self.W_v.v.T, dv)
        dh += dh_next
        do = dh * utils.tanh(C)
        do = utils.dsigmoid(o) * do
        self.W_o.d += np.dot(do, z.T)
        self.b_o.d += do

        dC = np.copy(dC_next)
        dC += dh * o * utils.dtanh(utils.tanh(C))
        dC_bar = dC * i
        dC_bar = utils.dtanh(C_bar) * dC_bar
        self.W_C.d += np.dot(dC_bar, z.T)
        self.b_C.d += dC_bar

        di = dC * C_bar
        di = utils.dsigmoid(i) * di
        self.W_i.d += np.dot(di, z.T)
        self.b_i.d += di

        df = dC * C_prev
        df = utils.dsigmoid(f) * df
        self.W_f.d += np.dot(df, z.T)
        self.b_f.d += df

        dz = (np.dot(self.W_f.v.T, df) + np.dot(self.W_i.v.T, di) +
              np.dot(self.W_C.v.T, dC_bar) + np.dot(self.W_o.v.T, do))
        dh_prev = dz[:self.h_size, :]
        dC_prev = f * dC

        return dh_prev, dC_prev
コード例 #4
0
ファイル: NN.py プロジェクト: aglabassi/my-neural-net
    def _compute_dJdis(self, acts, y):

        dJdis = [0] * self.nlayers  #No error in layer 0, so res[0] = 0
        dJdis[-1] = acts[-1][1:] - y

        for l in range(self.nlayers - 2, 0, -1):

            #Derivative of error according to output of current layer, computed using
            #dJdi of the next layer, by backpropaging through weighted arcs, ignoring bias
            #cause bias units dont have entering arcs.
            dJdo = np.transpose(np.dot(np.transpose(self.W[l]),
                                       dJdis[l + 1]))[1:]

            #Derivative of output according to inputs relative to the current layer.
            dodi = dsigmoid(np.dot(self.W[l - 1], acts[l - 1]))

            dJdis[l] = dJdo * dodi

        return dJdis
コード例 #5
0
    def calculate_errors(self, correct, outputs):
        errors = [[0 for neuron in range(0, self.neurons_per_layer[layer])]
                  for layer in range(0, self.layers)]

        # calculate output layer errors
        for i in range(0, len(outputs[-1])):
            o = outputs[-1][i]
            errors[-1][i] = u.dsigmoid(o) * (correct[i] - o)

        # calculate hidden layer errors
        for layer in reversed(range(0, self.layers - 1)):
            for neuron in range(0, self.neurons_per_layer[layer]):
                neuron_output = outputs[layer][neuron]

                error_caused = 0
                for i in range(0, self.neurons_per_layer[layer + 1]):
                    unit = self.model[layer + 1][i]
                    error_caused += unit.W[0][neuron] \
                                    * errors[layer+1][i] \
                                    * unit.d_func(neuron_output)

                errors[layer][neuron] = error_caused

        return errors
コード例 #6
0
 def error(self, wt, error, z):
     return np.multiply(np.matmul(wt.transpose(), error), utils.dsigmoid(z))
コード例 #7
0
 def result_error(self, correct, inputs):
     flz, slz, rz = self.feed_forward(inputs)
     cost = utils.ncost(correct, utils.sigmoid(rz))
     # self.learning_speed = cost
     return np.multiply(utils.dcost(correct, utils.sigmoid(rz)), utils.dsigmoid(rz))