예제 #1
0
    def backward(self, x, d, z1, z2, y):
        grad = {}

        W1, W2, W3 = network['W1'], network['W2'], network['W3']
        b1, b2, b3 = network['b1'], network['b2'], network['b3']

        # 出力層でのデルタ
        delta3 = functions.d_softmax_with_loss(d, y)
        # b3の勾配
        grad['b3'] = np.sum(delta3, axis=0)
        # W3の勾配
        grad['W3'] = np.dot(z2.T, delta3)
        # 活性化関数の導関数 Relu関数
        delta2 = np.dot(delta3, W3.T) * functions.d_sigmoid(z2)
        # b2の勾配
        grad['b2'] = np.sum(delta2, axis=0)
        # W2の勾配
        grad['W2'] = np.dot(z1.T, delta2)
        # 活性化関数の導関数 Relu関数
        delta1 = np.dot(delta2, W2.T) * functions.d_sigmoid(z1)
        # b1の勾配
        grad['b1'] = np.sum(delta1, axis=0)
        # W1の勾配
        grad['W1'] = np.dot(x.T, delta1)

        return grad
예제 #2
0
def backward(x, d, z1, y):
    # print("\n##### 誤差逆伝播開始 #####")

    grad = {}

    W1, W2 = network['W1'], network['W2']
    b1, b2 = network['b1'], network['b2']

    # 出力層でのデルタ
    delta2 = functions.d_mean_squared_error(d, y)
    # b2の勾配
    grad['b2'] = np.sum(delta2, axis=0)
    # W2の勾配
    grad['W2'] = np.dot(z1.T, delta2)
    # 中間層でのデルタ
    #delta1 = np.dot(delta2, W2.T) * functions.d_relu(z1)

    ## 試してみよう
    delta1 = np.dot(delta2, W2.T) * functions.d_sigmoid(z1)

    delta1 = delta1[np.newaxis, :]
    # b1の勾配
    grad['b1'] = np.sum(delta1, axis=0)
    x = x[np.newaxis, :]
    # W1の勾配
    grad['W1'] = np.dot(x.T, delta1)

    # print_vec("偏微分_重み1", grad["W1"])
    # print_vec("偏微分_重み2", grad["W2"])
    # print_vec("偏微分_バイアス1", grad["b1"])
    # print_vec("偏微分_バイアス2", grad["b2"])

    return grad
예제 #3
0
    def loss_margin_with_gradient(self, x, t):
        features = self.__extract_feature(x)
        l = self.loss(x, t)

        # gradient
        score = []
        for name, value in features.items():
            score.append(value * self.params[name])
        dW = d_sigmoid(sum(score))
        for name, value in features.items():
            features[name] = t * value * dW

        # margin
        m = sum(score) * t

        return l, m, features
예제 #4
0
        # 入力値
        X = np.array([a_bin[-t - 1], b_bin[-t - 1]]).reshape(1, -1)
        # 時刻tにおける正解データ
        dd = np.array([d_bin[binary_dim - t - 1]])

        u[:, t + 1] = np.dot(X, W_in) + np.dot(z[:, t].reshape(1, -1), W)
        z[:, t + 1] = functions.sigmoid(u[:, t + 1])
        #         z[:,t+1] = functions.relu(u[:,t+1])
        #         z[:,t+1] = np.tanh(u[:,t+1])
        y[:, t] = functions.sigmoid(np.dot(z[:, t + 1].reshape(1, -1), W_out))

        #誤差
        loss = functions.least_square(dd, y[:, t])

        delta_out[:, t] = functions.d_least_square(
            dd, y[:, t]) * functions.d_sigmoid(y[:, t])

        all_loss += loss

        out_bin[binary_dim - t - 1] = np.round(y[:, t])

    for t in range(binary_dim)[::-1]:
        X = np.array([a_bin[-t - 1], b_bin[-t - 1]]).reshape(1, -1)

        delta[:, t] = (np.dot(delta[:, t + 1].T, W.T) + np.dot(
            delta_out[:, t].T, W_out.T)) * functions.d_sigmoid(u[:, t + 1])
        #         delta[:,t] = (np.dot(delta[:,t+1].T, W.T) + np.dot(delta_out[:,t].T, W_out.T)) * functions.d_relu(u[:,t+1])
        #         delta[:,t] = (np.dot(delta[:,t+1].T, W.T) + np.dot(delta_out[:,t].T, W_out.T)) * d_tanh(u[:,t+1])

        # 勾配更新
        W_out_grad += np.dot(z[:, t + 1].reshape(-1, 1),
    # 時系列ループ
    for t in range(binary_dim):
        # 入力値
        X = np.array([a_bin[ - t - 1], b_bin[ - t - 1]]).reshape(1, -1)
        # 時刻tにおける正解データ
        dd = np.array([d_bin[binary_dim - t - 1]])
        
        u[:,t+1] = np.dot(X, W_in) + np.dot(z[:,t].reshape(1, -1), W)
        z[:,t+1] = functions.sigmoid(u[:,t+1])

        y[:,t] = functions.sigmoid(np.dot(z[:,t+1].reshape(1, -1), W_out))

        #誤差
        loss = functions.mean_squared_error(dd, y[:,t])
        
        delta_out[:,t] = functions.d_mean_squared_error(dd, y[:,t]) * functions.d_sigmoid(y[:,t])        
        
        all_loss += loss

        out_bin[binary_dim - t - 1] = np.round(y[:,t])

    for t in range(binary_dim)[::-1]:
        X = np.array([a_bin[-t-1],b_bin[-t-1]]).reshape(1, -1)        

        delta[:,t] = (np.dot(delta[:,t+1].T, W.T) + np.dot(delta_out[:,t].T, W_out.T)) * functions.d_sigmoid(u[:,t+1])

        # 勾配更新
        W_out_grad += np.dot(z[:,t+1].reshape(-1,1), delta_out[:,t].reshape(-1,1))
        W_grad += np.dot(z[:,t].reshape(-1,1), delta[:,t].reshape(1,-1))
        W_in_grad += np.dot(X.T, delta[:,t].reshape(1,-1))
    
        # 入力値
        X = np.array([a_bin[ - t - 1], b_bin[ - t - 1]]).reshape(1, -1)
        # 時刻tにおける正解データ
        dd = np.array([d_bin[binary_dim - t - 1]])
        
        u[:,t+1] = np.dot(X, W_in) + np.dot(z[:,t].reshape(1, -1), W)

        # 活性化関数を変更してみよう -> tanh
        # z[:,t+1] = functions.sigmoid(u[:,t+1])
        z [:, t + 1] = np.tanh (u [:, t + 1])
        y[:,t] = functions.sigmoid(np.dot(z[:,t+1].reshape(1, -1), W_out))

        #誤差
        loss = functions.mean_squared_error(dd, y[:,t])
        
        delta_out[:,t] = functions.d_mean_squared_error(dd, y[:,t]) * functions.d_sigmoid(y[:,t])        
        
        all_loss += loss

        out_bin[binary_dim - t - 1] = np.round(y[:,t])

    for t in range(binary_dim)[::-1]:
        X = np.array([a_bin[-t-1],b_bin[-t-1]]).reshape(1, -1)
        # 活性化関数を変更してみよう -> tanh ,d_sigmoid-> d_tanh
        # delta[:,t] = (np.dot(delta[:,t+1].T, W.T) + np.dot(delta_out[:,t].T, W_out.T)) * functions.d_sigmoid(u[:,t+1])
        delta[:,t] = (np.dot(delta[:,t+1].T, W.T) + np.dot(delta_out[:,t].T, W_out.T)) * d_tanh(u[:,t+1])

        # 勾配更新
        W_out_grad += np.dot(z[:,t+1].reshape(-1,1), delta_out[:,t].reshape(-1,1))
        W_grad += np.dot(z[:,t].reshape(-1,1), delta[:,t].reshape(1,-1))
        W_in_grad += np.dot(X.T, delta[:,t].reshape(1,-1))
        X = np.array([a_bin[-t - 1], b_bin[-t - 1]]).reshape(1, -1)
        # 時刻tにおける正解データ
        dd = np.array([d_bin[binary_dim - t - 1]])

        u[:, t + 1] = np.dot(X, W_in) + np.dot(z[:, t].reshape(1, -1), W)

        # 活性化関数を変更してみよう -> tanh
        # z[:,t+1] = functions.sigmoid(u[:,t+1])
        z[:, t + 1] = np.tanh(u[:, t + 1])
        y[:, t] = functions.sigmoid(np.dot(z[:, t + 1].reshape(1, -1), W_out))

        #誤差
        loss = functions.mean_squared_error(dd, y[:, t])

        delta_out[:, t] = functions.d_mean_squared_error(
            dd, y[:, t]) * functions.d_sigmoid(y[:, t])

        all_loss += loss

        out_bin[binary_dim - t - 1] = np.round(y[:, t])

    for t in range(binary_dim)[::-1]:
        X = np.array([a_bin[-t - 1], b_bin[-t - 1]]).reshape(1, -1)
        # 活性化関数を変更してみよう -> tanh ,d_sigmoid-> d_tanh
        # delta[:,t] = (np.dot(delta[:,t+1].T, W.T) + np.dot(delta_out[:,t].T, W_out.T)) * functions.d_sigmoid(u[:,t+1])
        delta[:,
              t] = (np.dot(delta[:, t + 1].T, W.T) +
                    np.dot(delta_out[:, t].T, W_out.T)) * d_tanh(u[:, t + 1])

        # 勾配更新
        W_out_grad += np.dot(z[:, t + 1].reshape(-1, 1),