Esempio n. 1
0
                (s[0] * s[1] - 3) / 3 if a_desc == 'L' else 0,
                (s[0] * s[0] - 2) / 2 if a_desc == 'L' else 0,
                (s[1] * s[1] - 4.5) / 4.5 if a_desc == 'L' else 0,
                1 if a_desc == 'L' else 0, s[0] -
                1 if a_desc == 'R' else 0, s[1] - 1.5 if a_desc == 'R' else 0,
                (s[0] * s[1] - 3) / 3 if a_desc == 'R' else 0,
                (s[0] * s[0] - 2) / 2 if a_desc == 'R' else 0,
                (s[1] * s[1] - 4.5) / 4.5 if a_desc == 'R' else 0,
                1 if a_desc == 'R' else 0, 1
            ])

        return x_vector


learn_tracker = LearnTracker()
gridworld = get_gridworld(step_reward=-0.1)

NUM_EPISODES = 20000

alpha_obj = Alpha(alpha=0.1)
alpha_obj.set_half_life_for_N_episodes(Nepisodes=NUM_EPISODES,
                                       alpha_final=0.03333333333333)

eps_obj = EpsilonGreedy(epsilon=0.5)
eps_obj.set_half_life_for_N_episodes(Nepisodes=NUM_EPISODES,
                                     epsilon_final=0.16666666666666)

agent = SA_SemiGradAgent(environment=gridworld,
                         update_type='qlearn',
                         sa_linear_function=LazyProgrammerMaze(gridworld),
                         learn_tracker=learn_tracker,
Esempio n. 2
0
 def setUp(self):
     unittest.TestCase.setUp(self)
     self.gridworld = get_gridworld()
     self.P = Policy(environment=self.gridworld)
     self.P.intialize_policy_to_equiprobable(env=self.gridworld)
Esempio n. 3
0
                    else:
                        print('  (%g to %g) STOCHASTIC'%rwd_ave_obj.get_min_max() )
                    
                    if sn_count == len(snD):
                        print()
                    
                    
        print('____'+'_'*len(header))
                

if __name__ == "__main__": # pragma: no cover
    
    from introrl.agent_supt.model import Model
    from introrl.mdp_data.simple_grid_world import get_gridworld
    
    gridworld = get_gridworld()
    
    get_sim = Model( gridworld, build_initial_model=True )
    
    # ---------- make a few stochastic to test summ_print
    #get_sim.define_statesD[s_hash].save_action_results( a_desc, sn_hash, reward_val)
    
    # make just the reward stochastic
    get_sim.define_statesD[(0, 2)].save_action_results( 'R', (0,3), 2.0)
    
    # make the action stochastic
    get_sim.define_statesD[(1,0)].save_action_results( 'U', 'XXX', 0.0)
    
    # make both the action and reward stochastic
    get_sim.define_statesD[(2,2)].save_action_results( 'U', 'XXX', 2.0)
    get_sim.define_statesD[(2,2)].save_action_results( 'U', 'XXX', 2.2)
Esempio n. 4
0
 def setUp(self):
     unittest.TestCase.setUp(self)
     self.ENV = get_gridworld()