def set_mu_from_states(self, s, seed=1, n_samples_eval=6000): if hasattr(self, "Pi"): del self.Pi self.mu, self.mu_r, self.mu_next, self.mu_phi, self.mu_phi_next = mdp.samples_distribution_from_states(self.mdp, policy=self.target_policy, phi=self.phi, states=s[:n_samples_eval, :], n_next=self.mu_n_next, seed=seed) print "Mu set to trajectory samples"
def set_mu_from_trajectory(self, n_samples=1000, n_eps=1, verbose=0, seed=1, n_samples_eval=6000): s, _, _, _, restarts = self.mdp.samples_cached(n_iter=n_samples, n_restarts=n_eps, policy=self.behavior_policy, seed=seed, verbose=verbose) if hasattr(self, "Pi"): del self.Pi self.mu, self.mu_r, self.mu_next, self.mu_phi, self.mu_phi_next = mdp.samples_distribution_from_states(self.mdp, policy=self.target_policy, phi=self.phi, states=s[:n_samples_eval, :], n_next=self.mu_n_next, seed=self.mu_seed) print "Mu set to trajectory samples"