(s[0] * s[1] - 3) / 3 if a_desc == 'L' else 0, (s[0] * s[0] - 2) / 2 if a_desc == 'L' else 0, (s[1] * s[1] - 4.5) / 4.5 if a_desc == 'L' else 0, 1 if a_desc == 'L' else 0, s[0] - 1 if a_desc == 'R' else 0, s[1] - 1.5 if a_desc == 'R' else 0, (s[0] * s[1] - 3) / 3 if a_desc == 'R' else 0, (s[0] * s[0] - 2) / 2 if a_desc == 'R' else 0, (s[1] * s[1] - 4.5) / 4.5 if a_desc == 'R' else 0, 1 if a_desc == 'R' else 0, 1 ]) return x_vector learn_tracker = LearnTracker() gridworld = get_gridworld(step_reward=-0.1) NUM_EPISODES = 20000 alpha_obj = Alpha(alpha=0.1) alpha_obj.set_half_life_for_N_episodes(Nepisodes=NUM_EPISODES, alpha_final=0.03333333333333) eps_obj = EpsilonGreedy(epsilon=0.5) eps_obj.set_half_life_for_N_episodes(Nepisodes=NUM_EPISODES, epsilon_final=0.16666666666666) agent = SA_SemiGradAgent(environment=gridworld, update_type='qlearn', sa_linear_function=LazyProgrammerMaze(gridworld), learn_tracker=learn_tracker,
def setUp(self): unittest.TestCase.setUp(self) self.gridworld = get_gridworld() self.P = Policy(environment=self.gridworld) self.P.intialize_policy_to_equiprobable(env=self.gridworld)
else: print(' (%g to %g) STOCHASTIC'%rwd_ave_obj.get_min_max() ) if sn_count == len(snD): print() print('____'+'_'*len(header)) if __name__ == "__main__": # pragma: no cover from introrl.agent_supt.model import Model from introrl.mdp_data.simple_grid_world import get_gridworld gridworld = get_gridworld() get_sim = Model( gridworld, build_initial_model=True ) # ---------- make a few stochastic to test summ_print #get_sim.define_statesD[s_hash].save_action_results( a_desc, sn_hash, reward_val) # make just the reward stochastic get_sim.define_statesD[(0, 2)].save_action_results( 'R', (0,3), 2.0) # make the action stochastic get_sim.define_statesD[(1,0)].save_action_results( 'U', 'XXX', 0.0) # make both the action and reward stochastic get_sim.define_statesD[(2,2)].save_action_results( 'U', 'XXX', 2.0) get_sim.define_statesD[(2,2)].save_action_results( 'U', 'XXX', 2.2)
def setUp(self): unittest.TestCase.setUp(self) self.ENV = get_gridworld()