def test_TRPO_save(tmpdir): agent_path = tmpdir / 'agent_{}'.format(datetime.now().strftime("%H%M%S%f")) params = dict(ent_coeff=0.0, max_kl=.001, lam=.98, n_epochs_line_search=10, n_epochs_cg=10, cg_damping=1e-2, cg_residual_tol=1e-10, quiet=True) agent_save = learn(TRPO, params) agent_save.save(agent_path) agent_load = Agent.load(agent_path) for att, method in vars(agent_save).items(): save_attr = getattr(agent_save, att) load_attr = getattr(agent_load, att) tu.assert_eq(save_attr, load_attr)
def test_sac_save(): agent_path = './agentdir{}/'.format(datetime.now().strftime("%H%M%S%f")) agent_save = learn_sac() agent_save.save(agent_path) agent_load = Agent.load(agent_path) shutil.rmtree(agent_path) for att, method in agent_save.__dict__.items(): save_attr = getattr(agent_save, att) load_attr = getattr(agent_load, att) #print('{}: {}'.format(att, type(save_attr))) tu.assert_eq(save_attr, load_attr)
def test_fqi_boosted_save(): params = dict(n_iterations=10, boosted=True) agent_save, _ = learn(FQI, params) agent_path = './agentdir{}/'.format(datetime.now().strftime("%H%M%S%f")) agent_save.save(agent_path) agent_load = Agent.load(agent_path) shutil.rmtree(agent_path) for att, method in agent_save.__dict__.items(): save_attr = getattr(agent_save, att) load_attr = getattr(agent_load, att) #print('{}: {}'.format(att, type(save_attr))) tu.assert_eq(save_attr, load_attr)
def test_eNAC_save(): params = dict(learning_rate=AdaptiveParameter(value=.01)) agent_path = './agentdir{}/'.format(datetime.now().strftime("%H%M%S%f")) agent_save = learn(eNAC, params) agent_save.save(agent_path) agent_load = Agent.load(agent_path) shutil.rmtree(agent_path) for att, method in agent_save.__dict__.items(): save_attr = getattr(agent_save, att) load_attr = getattr(agent_load, att) tu.assert_eq(save_attr, load_attr)
def test_PPO_save(tmpdir): agent_path = tmpdir / 'agent_{}'.format(datetime.now().strftime("%H%M%S%f")) params = dict(actor_optimizer={'class': optim.Adam, 'params': {'lr': 3e-4}}, n_epochs_policy=4, batch_size=64, eps_ppo=.2, lam=.95, quiet=True) agent_save = learn(PPO, params) agent_save.save(agent_path) agent_load = Agent.load(agent_path) for att, method in vars(agent_save).items(): save_attr = getattr(agent_save, att) load_attr = getattr(agent_load, att) tu.assert_eq(save_attr, load_attr)
def test_categorical_dqn_save(tmpdir): agent_path = tmpdir / 'agent_{}'.format( datetime.now().strftime("%H%M%S%f")) params = dict(batch_size=50, initial_replay_size=50, max_replay_size=5000, target_update_frequency=50) agent_save = learn(CategoricalDQN, params) agent_save.save(agent_path, full_save=True) agent_load = Agent.load(agent_path) for att, method in vars(agent_save).items(): save_attr = getattr(agent_save, att) load_attr = getattr(agent_load, att) tu.assert_eq(save_attr, load_attr)
def test_q_learning_save(tmpdir): agent_path = tmpdir / 'agent_{}'.format(datetime.now().strftime("%H%M%S%f")) pi, mdp, _ = initialize() agent_save = QLearning(mdp.info, pi, Parameter(.5)) core = Core(agent_save, mdp) # Train core.learn(n_steps=100, n_steps_per_fit=1, quiet=True) agent_save.save(agent_path) agent_load = Agent.load(agent_path) for att, method in vars(agent_save).items(): save_attr = getattr(agent_save, att) load_attr = getattr(agent_load, att) tu.assert_eq(save_attr, load_attr)
def test_prioritized_dqn_save(tmpdir): agent_path = tmpdir / 'agent_{}'.format(datetime.now().strftime("%H%M%S%f")) replay_memory = PrioritizedReplayMemory( 50, 500, alpha=.6, beta=LinearParameter(.4, threshold_value=1, n=500 // 5) ) params = dict(batch_size=50, initial_replay_size=50, max_replay_size=500, target_update_frequency=50, replay_memory=replay_memory) agent_save = learn(DQN, params) agent_save.save(agent_path, full_save=True) agent_load = Agent.load(agent_path) for att, method in vars(agent_save).items(): save_attr = getattr(agent_save, att) load_attr = getattr(agent_load, att) tu.assert_eq(save_attr, load_attr)
def test_categorical_dqn_save(): params = dict(batch_size=50, n_approximators=1, initial_replay_size=50, max_replay_size=5000, target_update_frequency=50) agent_save = learn(CategoricalDQN, params) agent_path = './agentdir{}/'.format(datetime.now().strftime("%H%M%S%f")) agent_save.save(agent_path) agent_load = Agent.load(agent_path) shutil.rmtree(agent_path) for att, method in agent_save.__dict__.items(): save_attr = getattr(agent_save, att) load_attr = getattr(agent_load, att) #print('{}: {}'.format(att, type(save_attr))) tu.assert_eq(save_attr, load_attr)
def test_q_learning_save(): pi, mdp, _ = initialize() agent_save = QLearning(mdp.info, pi, Parameter(.5)) core = Core(agent_save, mdp) # Train core.learn(n_steps=100, n_steps_per_fit=1, quiet=True) agent_path = './agentdir{}/'.format(datetime.now().strftime("%H%M%S%f")) agent_save.save(agent_path) agent_load = Agent.load(agent_path) shutil.rmtree(agent_path) for att, method in agent_save.__dict__.items(): save_attr = getattr(agent_save, att) load_attr = getattr(agent_load, att) #print('{}: {}'.format(att, type(save_attr))) tu.assert_eq(save_attr, load_attr)
def test_sarsa_lambda_continuous_linear_save(): pi, _, mdp_continuous = initialize() mdp_continuous.seed(1) n_tilings = 1 tilings = Tiles.generate(n_tilings, [2, 2], mdp_continuous.info.observation_space.low, mdp_continuous.info.observation_space.high) features = Features(tilings=tilings) approximator_params = dict( input_shape=(features.size, ), output_shape=(mdp_continuous.info.action_space.n, ), n_actions=mdp_continuous.info.action_space.n) agent_save = SARSALambdaContinuous(mdp_continuous.info, pi, LinearApproximator, Parameter(.1), .9, features=features, approximator_params=approximator_params) core = Core(agent_save, mdp_continuous) # Train core.learn(n_steps=100, n_steps_per_fit=1, quiet=True) agent_path = './agentdir{}/'.format(datetime.now().strftime("%H%M%S%f")) agent_save.save(agent_path) agent_load = Agent.load(agent_path) shutil.rmtree(agent_path) for att, method in agent_save.__dict__.items(): save_attr = getattr(agent_save, att) load_attr = getattr(agent_load, att) #print('{}: {}'.format(att, type(save_attr))) tu.assert_eq(save_attr, load_attr)
def test_true_online_sarsa_lambda_save(tmpdir): agent_path = tmpdir / 'agent_{}'.format( datetime.now().strftime("%H%M%S%f")) pi, _, mdp_continuous = initialize() mdp_continuous.seed(1) n_tilings = 1 tilings = Tiles.generate(n_tilings, [2, 2], mdp_continuous.info.observation_space.low, mdp_continuous.info.observation_space.high) features = Features(tilings=tilings) approximator_params = dict( input_shape=(features.size, ), output_shape=(mdp_continuous.info.action_space.n, ), n_actions=mdp_continuous.info.action_space.n) agent_save = TrueOnlineSARSALambda(mdp_continuous.info, pi, Parameter(.1), .9, features=features, approximator_params=approximator_params) core = Core(agent_save, mdp_continuous) # Train core.learn(n_steps=100, n_steps_per_fit=1, quiet=True) agent_save.save(agent_path) agent_load = Agent.load(agent_path) for att, method in vars(agent_save).items(): save_attr = getattr(agent_save, att) load_attr = getattr(agent_load, att) tu.assert_eq(save_attr, load_attr)
def test_sarsa_lambda_continuous_nn_save(tmpdir): agent_path = tmpdir / 'agent_{}'.format( datetime.now().strftime("%H%M%S%f")) pi, _, mdp_continuous = initialize() mdp_continuous.seed(1) features = Features( n_outputs=mdp_continuous.info.observation_space.shape[0]) approximator_params = dict( input_shape=(features.size, ), output_shape=(mdp_continuous.info.action_space.n, ), network=Network, n_actions=mdp_continuous.info.action_space.n) agent_save = SARSALambdaContinuous(mdp_continuous.info, pi, TorchApproximator, Parameter(.1), .9, features=features, approximator_params=approximator_params) core = Core(agent_save, mdp_continuous) # Train core.learn(n_steps=100, n_steps_per_fit=1, quiet=True) agent_save.save(agent_path) agent_load = Agent.load(agent_path) for att, method in vars(agent_save).items(): save_attr = getattr(agent_save, att) load_attr = getattr(agent_load, att) tu.assert_eq(save_attr, load_attr)
def test_TRPO_save(): params = dict(ent_coeff=0.0, max_kl=.001, lam=.98, n_epochs_line_search=10, n_epochs_cg=10, cg_damping=1e-2, cg_residual_tol=1e-10, quiet=True) agent_path = './agentdir{}/'.format(datetime.now().strftime("%H%M%S%f")) agent_save = learn(TRPO, params) agent_save.save(agent_path) agent_load = Agent.load(agent_path) shutil.rmtree(agent_path) for att, method in agent_save.__dict__.items(): save_attr = getattr(agent_save, att) load_attr = getattr(agent_load, att) #print('{}: {}'.format(att, type(save_attr))) tu.assert_eq(save_attr, load_attr)
def _load_agent(self, path): return Agent.load(path)
potential_violation_times, env.resource_edges ], dtype=object) mdp = StrictResourceTargetTopEnvironment("dataset.db", params['area'], observation, gamma=params['gamma'], allow_wait=True, start_hour=params['start_hour'], end_hour=params['end_hour'], add_time=params['gamma'] < 1, speed=speed) # days = [i for i in range(1, 356) if i % 13 == 1] # validation days = [i for i in range(1, 356) if i % 13 == 0] # test agent = Agent(mdp.info, Greedy(mdp, speed)) core = Core(agent, mdp) value = np.mean(compute_J(core.evaluate(initial_states=days, render=True))) print("avg J Greedy", value) # mdp.save_rendered("greedy.mp4", 10000) t, h = .1, 600.0 agent = Agent(mdp.info, ACO(mdp, sys.maxsize, t, speed, max_time=h)) core = Core(agent, mdp) value = np.mean(compute_J(core.evaluate(initial_states=days, render=True))) print("avg J ACO", t, h, value) # mdp.save_rendered("aco_%d_%d.mp4" % (t * 60, h)) mdp.close()