def _value_iteration_slow(self): old_values = dict(self.mdp.values) for i in range(100): values = value_iteration(self.mdp.values, self.mdp, num_iter=1) policy = policy_extraction(values, self.mdp) self.gridworldwindow.update_grid(values, policy) self.mdp.update_values(values) self.mdp.update_policies(policy) self.gridworldwindow.window.update() time.sleep(0.25) self.gridworldwindow.window.update() new_values = dict(values) if values_converged(new_values, old_values): break old_values = new_values self.gridworldwindow.show_dialog('Value Iteration has converged in {} steps!'.format(i+1))
def _value_iteration_100_steps(self): values = value_iteration(self.mdp.values, self.mdp, num_iter=100) policy = policy_extraction(values, self.mdp) self.gridworld.update_grid(values, policy) self.mdp.update_values(values) self.mdp.update_policy(policy)
def _value_iteration_1_step(self): values = value_iteration(self.mdp.values, self.mdp, num_iter=1) policy = policy_extraction(values, self.mdp) self.gridworldwindow.update_grid(values, policy) self.mdp.update_values(values) self.mdp.update_policies(policy)