def __init__(self, hp=None, input_size=None, output_size=None, policy=None, normalizer=None, monitor_dir=None): self.hp = hp or HyperParams() #HyperParameters used np.random.seed(self.hp.seed) # Random seed initialize self.env = pybullet_envs.make(self.hp.env_name) #Making Environment # Setting Monitor to observe how agent plays if monitor_dir is not None: should_record = lambda i: self.record_video self.env = wrappers.Monitor(self.env, monitor_dir, video_callable=should_record, force=True) self.in_size = input_size or self.env.observation_space.shape[ 0] # Input size self.out_size = output_size or self.env.action_space.shape[ 0] # Output size self.policy = policy or Policy(self.in_size, self.out_size, self.hp) # Policy to be used self.normalizer = normalizer or Normalizer( self.in_size) # Normalizer to use self.record_video = False
def train_eval_reacher_dqn(num_train: int, num_test: int, device: str, config: Optional[Dict] = None) -> Tuple[Agent, float, float]: assert device in ('cpu', 'cuda'), "The device must be either cpu or cuda" env = pe.make('ReacherBulletEnv-v0') print('Training the agent...') agent = te.train_dqn_agent(env, num_train, config, device, show=True) print() print('\nTraining finished. Evaluating performance.') mean_score, success_rate = te.evaluate_model(agent, num_test, show=True) return agent, mean_score, success_rate
import time import sys sys.path.insert(0, '../..') import libs_agents from libs_common.Training import * import models.ddpg_noisy.model.src.model_critic as ModelCritic import models.ddpg_noisy.model.src.model_actor as ModelActor import models.ddpg_noisy.model.src.config as Config path = "models/ddpg_noisy/model/" env = pybullet_envs.make("AntBulletEnv-v0") #env.render() agent = libs_agents.AgentDDPG(env, ModelCritic, ModelActor, Config) max_iterations = 4*(10**6) trainig = TrainingIterations(env, agent, max_iterations, path, 10000) trainig.run() ''' agent.load(path) agent.disable_training() while True: reward, done = agent.main() env.render() time.sleep(0.01)
'discard_time_limit': True, 'alpha': 0.1, 'actor_update_interval': 2, 'critic_target_update_interval': 2, 'warmup_step': 1000, 'q_regularisation_k': 2, 'max_env_step': 200000, 'testing_gap': 10000, 'testing_episodes': 10, 'saving_gap': 100000, } seeds = [11] seed_returns = [] path = os.path.dirname(os.path.realpath(__file__)) for seed in seeds: env = pybullet_envs.make("Walker2DBulletEnv-v0") # call render() before training to visualize (pybullet-gym-specific) # env.render() seed_path = path + '/seed'+str(seed) agent = SACDrQ(algo_params=drq_params, env=env, path=seed_path, seed=seed) agent.run(test=False) # the sleep argument pause the rendering for a while at every env step, useful for slowing down visualization # agent.run(test=True, load_network_ep=50, sleep=0.05) seed_returns.append(agent.statistic_dict['episode_return']) del env, agent
import time import sys sys.path.insert(0, '../..') import libs_agents from libs_common.Training import * import models.ddpg_noisy.model.src.model_critic as ModelCritic import models.ddpg_noisy.model.src.model_actor as ModelActor import models.ddpg_noisy.model.src.config as Config path = "models/ddpg_noisy/model/" env = pybullet_envs.make("HalfCheetahBulletEnv-v0") #env.render() agent = libs_agents.AgentDDPG(env, ModelCritic, ModelActor, Config) max_iterations = 4 * (10**6) trainig = TrainingIterations(env, agent, max_iterations, path, 10000) trainig.run() ''' agent.load(path) agent.disable_training() while True: reward, done = agent.main() env.render() time.sleep(0.01) '''
'Q_weight_decay': 0.0, 'update_interval': 1, 'batch_size': 100, 'optimization_steps': 1, 'tau': 0.005, 'discount_factor': 0.99, 'discard_time_limit': True, 'warmup_step': 2500, 'observation_normalization': False, 'training_episodes': 101, 'testing_gap': 10, 'testing_episodes': 10, 'saving_gap': 50, } seeds = [11, 22, 33, 44, 55, 66] seed_returns = [] path = os.path.dirname(os.path.realpath(__file__)) for seed in seeds: env = pybullet_envs.make("InvertedPendulumSwingupBulletEnv-v0") # call render() before training to visualize (pybullet-gym-specific) # env.render() seed_path = path + '/seed' + str(seed) agent = DDPG(algo_params=ddpg_params, env=env, path=seed_path, seed=seed) agent.run(test=False) # the sleep argument pause the rendering for a while at every env step, useful for slowing down visualization # agent.run(test=True, load_network_ep=50, sleep=0.05) seed_returns.append(agent.statistic_dict['episode_return']) del env, agent