Python compute_probab_ratios Exemples

Langage de programmation: Python

Espace de nommage/Pack: tensor2tensor.trax.rlax.ppo

Méthode/Fonction: compute_probab_ratios

Exemples au hotexamples.com: 2

Python compute_probab_ratios - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de tensor2tensor.trax.rlax.ppo.compute_probab_ratios extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Exemple #1

0

Afficher le fichier

Fichier : ppo_test.py Projet : hubayirp/fabric-vsf

def test_compute_probab_ratios(self): p_old = np.array([[ [np.log(0.1), np.log(0.2), np.log(0.6), np.log(0.1)], [np.log(0.4), np.log(0.1), np.log(0.4), np.log(0.1)], [np.log(0.3), np.log(0.1), np.log(0.5), np.log(0.1)], [np.log(0.1), np.log(0.2), np.log(0.6), np.log(0.1)], [np.log(0.3), np.log(0.1), np.log(0.5), np.log(0.1)], ], [ [np.log(0.3), np.log(0.1), np.log(0.5), np.log(0.1)], [np.log(0.1), np.log(0.1), np.log(0.4), np.log(0.4)], [np.log(0.3), np.log(0.1), np.log(0.5), np.log(0.1)], [np.log(0.1), np.log(0.2), np.log(0.6), np.log(0.1)], [np.log(0.3), np.log(0.1), np.log(0.5), np.log(0.1)], ]]) p_new = np.array([[ [np.log(0.3), np.log(0.1), np.log(0.5), np.log(0.1)], [np.log(0.4), np.log(0.1), np.log(0.1), np.log(0.3)], [np.log(0.1), np.log(0.2), np.log(0.1), np.log(0.6)], [np.log(0.3), np.log(0.1), np.log(0.5), np.log(0.1)], [np.log(0.1), np.log(0.2), np.log(0.1), np.log(0.6)], ], [ [np.log(0.1), np.log(0.2), np.log(0.1), np.log(0.6)], [np.log(0.1), np.log(0.1), np.log(0.2), np.log(0.6)], [np.log(0.3), np.log(0.1), np.log(0.3), np.log(0.3)], [np.log(0.1), np.log(0.2), np.log(0.1), np.log(0.6)], [np.log(0.1), np.log(0.2), np.log(0.1), np.log(0.6)], ]]) actions = np.array([[1, 2, 0, 1], [0, 3, 3, 0]]) mask = np.array([[1, 1, 0, 0], [1, 1, 1, 0]]) probab_ratios = ppo.compute_probab_ratios(p_new, p_old, actions, mask) self.assertAllClose( np.array([ [0.1 / 0.2, 0.1 / 0.4, 0.0, 0.0], [0.1 / 0.3, 0.6 / 0.4, 0.3 / 0.1, 0.0], ]), probab_ratios)

Exemple #2

0

Afficher le fichier

Fichier : ppo_test.py Projet : zwcdp/tensor2tensor

def test_compute_probab_ratios(self): p_old = np.array([[ [0.1, 0.2, 0.6, 0.1], [0.4, 0.1, 0.4, 0.1], [0.3, 0.1, 0.5, 0.1], [0.1, 0.2, 0.6, 0.1], ], [ [0.3, 0.1, 0.5, 0.1], [0.1, 0.1, 0.4, 0.4], [0.3, 0.1, 0.5, 0.1], [0.1, 0.2, 0.6, 0.1], ]]) p_new = np.array([[ [0.3, 0.1, 0.5, 0.1], [0.4, 0.1, 0.1, 0.3], [0.1, 0.2, 0.1, 0.6], [0.3, 0.1, 0.5, 0.1], ], [ [0.1, 0.2, 0.1, 0.6], [0.1, 0.1, 0.2, 0.6], [0.3, 0.1, 0.3, 0.3], [0.1, 0.2, 0.1, 0.6], ]]) actions = np.array([[1, 2, 0, 1], [0, 3, 3, 0]]) mask = np.array([[1, 1, 0, 0], [1, 1, 1, 0]]) probab_ratios = ppo.compute_probab_ratios(p_old, p_new, actions, mask) self.assertAllClose( np.array([ [0.1 / 0.2, 0.1 / 0.4, 0.0, 0.0], [0.1 / 0.3, 0.6 / 0.4, 0.3 / 0.1, 0.0], ]), probab_ratios)