def test_run(self): # Set up environment and policy env = gym.make('MountainCar-v0') pol = vcf.DiscreteRandomControl(env.action_space.n) # Define an agent phi_1 = vcf.BiasUnit() td_1 = vcf.TD(len(phi_1)) params_1 = { 'alpha': 0.01, 'gm': 0.999, 'gm_p': 0.999, 'lm': 0.1, } agent_1 = vcf.Agent(td_1, phi_1, params_1) agents = [agent_1] # Set up the experiment experiment = vcf.LiveExperiment(env, pol, agents) # Try running the experiment num_eps = 10 max_steps = 10 experiment.run(num_eps, max_steps, callbacks=[])
def test_run_with_callbacks(self): # Set up environment and policy env = gym.make('MountainCar-v0') pol = vcf.DiscreteRandomControl(env.action_space.n) # Define an agent phi_1 = vcf.BiasUnit() td_1 = vcf.TD(len(phi_1)) params_1 = { 'alpha': 0.01, 'gm': 0.999, 'gm_p': 0.999, 'lm': 0.1, } agent_1 = vcf.Agent(td_1, phi_1, params_1) agents = [agent_1] # Set up the experiment experiment = vcf.LiveExperiment(env, pol, agents) # Set up testing callbacks cbk = _CheckCallback() # Try running the experiment num_eps = 10 max_steps = 10 experiment.run(num_eps, max_steps, callbacks=[cbk]) # Check that the callbacks ran properly assert (cbk.experiment_begin > 0) assert (cbk.experiment_end > 0) assert (cbk.episode_begin > 0) assert (cbk.episode_end > 0) assert (cbk.step_begin > 0) assert (cbk.step_end > 0)
tiling_2 = vcf.features.BinaryTiling(env.observation_space, 19) tiling_3 = vcf.features.BinaryTiling(env.observation_space, 31) # Concatenate binary vectors phi = vcf.Union(tiling_1, tiling_2, tiling_3) # Define the control (discrete actions Q-learning) dq = vcf.DiscreteQ(len(phi), na, epsilon=0.002) dq_params = { 'alpha' : vcf.parameters.EpisodicPowerLaw(0.2, 0.25), 'gm' : 0.9999, 'gm_p' : vcf.Constant(0.9999, 0), 'lm' : vcf.Constant(0.5, 0), } control = vcf.Agent(dq, phi, dq_params) # List of agents to update learners = [control] # Set up the experiment experiment = vcf.LiveExperiment(env, control, learners=learners) # Set up callbacks hist_cbk = vcf.callbacks.History() cbk_lst = [ vcf.callbacks.Progress(), hist_cbk, ] # Run the experiment experiment.run(150, 2000, callbacks=cbk_lst)
def test_init(self): env = gym.make('MountainCar-v0') pol = vcf.DiscreteRandomControl(env.action_space.n) agents = [] experiment = vcf.LiveExperiment(env, pol, agents)