Python compute_probab_ratiosの例

プログラミング言語: Python

名前空間/パッケージ名: tensor2tensor.trax.rlax.ppo

メソッド/関数: compute_probab_ratios

hotexamples.comのコード掲載数: 2

Python compute_probab_ratios - 2件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのtensor2tensor.trax.rlax.ppo.compute_probab_ratiosの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: ppo_test.py プロジェクト: hubayirp/fabric-vsf

  def test_compute_probab_ratios(self):
    p_old = np.array([[
        [np.log(0.1), np.log(0.2), np.log(0.6), np.log(0.1)],
        [np.log(0.4), np.log(0.1), np.log(0.4), np.log(0.1)],
        [np.log(0.3), np.log(0.1), np.log(0.5), np.log(0.1)],
        [np.log(0.1), np.log(0.2), np.log(0.6), np.log(0.1)],
        [np.log(0.3), np.log(0.1), np.log(0.5), np.log(0.1)],
    ], [
        [np.log(0.3), np.log(0.1), np.log(0.5), np.log(0.1)],
        [np.log(0.1), np.log(0.1), np.log(0.4), np.log(0.4)],
        [np.log(0.3), np.log(0.1), np.log(0.5), np.log(0.1)],
        [np.log(0.1), np.log(0.2), np.log(0.6), np.log(0.1)],
        [np.log(0.3), np.log(0.1), np.log(0.5), np.log(0.1)],
    ]])

    p_new = np.array([[
        [np.log(0.3), np.log(0.1), np.log(0.5), np.log(0.1)],
        [np.log(0.4), np.log(0.1), np.log(0.1), np.log(0.3)],
        [np.log(0.1), np.log(0.2), np.log(0.1), np.log(0.6)],
        [np.log(0.3), np.log(0.1), np.log(0.5), np.log(0.1)],
        [np.log(0.1), np.log(0.2), np.log(0.1), np.log(0.6)],
    ], [
        [np.log(0.1), np.log(0.2), np.log(0.1), np.log(0.6)],
        [np.log(0.1), np.log(0.1), np.log(0.2), np.log(0.6)],
        [np.log(0.3), np.log(0.1), np.log(0.3), np.log(0.3)],
        [np.log(0.1), np.log(0.2), np.log(0.1), np.log(0.6)],
        [np.log(0.1), np.log(0.2), np.log(0.1), np.log(0.6)],
    ]])

    actions = np.array([[1, 2, 0, 1], [0, 3, 3, 0]])

    mask = np.array([[1, 1, 0, 0], [1, 1, 1, 0]])

    probab_ratios = ppo.compute_probab_ratios(p_new, p_old, actions, mask)

    self.assertAllClose(
        np.array([
            [0.1 / 0.2, 0.1 / 0.4, 0.0, 0.0],
            [0.1 / 0.3, 0.6 / 0.4, 0.3 / 0.1, 0.0],
        ]), probab_ratios)

コード例 #2

ファイルを表示

ファイル: ppo_test.py プロジェクト: zwcdp/tensor2tensor

    def test_compute_probab_ratios(self):
        p_old = np.array([[
            [0.1, 0.2, 0.6, 0.1],
            [0.4, 0.1, 0.4, 0.1],
            [0.3, 0.1, 0.5, 0.1],
            [0.1, 0.2, 0.6, 0.1],
        ],
                          [
                              [0.3, 0.1, 0.5, 0.1],
                              [0.1, 0.1, 0.4, 0.4],
                              [0.3, 0.1, 0.5, 0.1],
                              [0.1, 0.2, 0.6, 0.1],
                          ]])

        p_new = np.array([[
            [0.3, 0.1, 0.5, 0.1],
            [0.4, 0.1, 0.1, 0.3],
            [0.1, 0.2, 0.1, 0.6],
            [0.3, 0.1, 0.5, 0.1],
        ],
                          [
                              [0.1, 0.2, 0.1, 0.6],
                              [0.1, 0.1, 0.2, 0.6],
                              [0.3, 0.1, 0.3, 0.3],
                              [0.1, 0.2, 0.1, 0.6],
                          ]])

        actions = np.array([[1, 2, 0, 1], [0, 3, 3, 0]])

        mask = np.array([[1, 1, 0, 0], [1, 1, 1, 0]])

        probab_ratios = ppo.compute_probab_ratios(p_old, p_new, actions, mask)

        self.assertAllClose(
            np.array([
                [0.1 / 0.2, 0.1 / 0.4, 0.0, 0.0],
                [0.1 / 0.3, 0.6 / 0.4, 0.3 / 0.1, 0.0],
            ]), probab_ratios)