def __init__(self,
              hp=None,
              input_size=None,
              output_size=None,
              policy=None,
              normalizer=None,
              monitor_dir=None):
     self.hp = hp or HyperParams()  #HyperParameters used
     np.random.seed(self.hp.seed)  # Random seed initialize
     self.env = pybullet_envs.make(self.hp.env_name)  #Making Environment
     # Setting Monitor to observe how agent plays
     if monitor_dir is not None:
         should_record = lambda i: self.record_video
         self.env = wrappers.Monitor(self.env,
                                     monitor_dir,
                                     video_callable=should_record,
                                     force=True)
     self.in_size = input_size or self.env.observation_space.shape[
         0]  # Input size
     self.out_size = output_size or self.env.action_space.shape[
         0]  # Output size
     self.policy = policy or Policy(self.in_size, self.out_size,
                                    self.hp)  # Policy to be used
     self.normalizer = normalizer or Normalizer(
         self.in_size)  # Normalizer to use
     self.record_video = False
Ejemplo n.º 2
0
def train_eval_reacher_dqn(num_train: int,
                           num_test: int,
                           device: str,
                           config: Optional[Dict] = None) -> Tuple[Agent, float, float]:
    assert device in ('cpu', 'cuda'), "The device must be either cpu or cuda"

    env = pe.make('ReacherBulletEnv-v0')
    print('Training the agent...')
    agent = te.train_dqn_agent(env, num_train, config, device, show=True)
    print()
    print('\nTraining finished. Evaluating performance.')

    mean_score, success_rate = te.evaluate_model(agent, num_test, show=True)

    return agent, mean_score, success_rate
Ejemplo n.º 3
0
import time

import sys
sys.path.insert(0, '../..')

import libs_agents
from libs_common.Training import *


import models.ddpg_noisy.model.src.model_critic     as ModelCritic
import models.ddpg_noisy.model.src.model_actor      as ModelActor
import models.ddpg_noisy.model.src.config           as Config

path = "models/ddpg_noisy/model/"

env = pybullet_envs.make("AntBulletEnv-v0")
#env.render()

agent = libs_agents.AgentDDPG(env, ModelCritic, ModelActor, Config)

max_iterations = 4*(10**6)
trainig = TrainingIterations(env, agent, max_iterations, path, 10000)
trainig.run()

'''
agent.load(path)
agent.disable_training()
while True:
    reward, done = agent.main()
    env.render()
    time.sleep(0.01)
Ejemplo n.º 4
0
    'discard_time_limit': True,

    'alpha': 0.1,
    'actor_update_interval': 2,
    'critic_target_update_interval': 2,
    'warmup_step': 1000,
    'q_regularisation_k': 2,

    'max_env_step': 200000,
    'testing_gap': 10000,
    'testing_episodes': 10,
    'saving_gap': 100000,
}

seeds = [11]
seed_returns = []
path = os.path.dirname(os.path.realpath(__file__))
for seed in seeds:

    env = pybullet_envs.make("Walker2DBulletEnv-v0")
    # call render() before training to visualize (pybullet-gym-specific)
    # env.render()
    seed_path = path + '/seed'+str(seed)

    agent = SACDrQ(algo_params=drq_params, env=env, path=seed_path, seed=seed)
    agent.run(test=False)
    # the sleep argument pause the rendering for a while at every env step, useful for slowing down visualization
    # agent.run(test=True, load_network_ep=50, sleep=0.05)
    seed_returns.append(agent.statistic_dict['episode_return'])
    del env, agent
Ejemplo n.º 5
0
import time

import sys

sys.path.insert(0, '../..')

import libs_agents
from libs_common.Training import *

import models.ddpg_noisy.model.src.model_critic as ModelCritic
import models.ddpg_noisy.model.src.model_actor as ModelActor
import models.ddpg_noisy.model.src.config as Config

path = "models/ddpg_noisy/model/"

env = pybullet_envs.make("HalfCheetahBulletEnv-v0")
#env.render()

agent = libs_agents.AgentDDPG(env, ModelCritic, ModelActor, Config)

max_iterations = 4 * (10**6)
trainig = TrainingIterations(env, agent, max_iterations, path, 10000)
trainig.run()
'''
agent.load(path)
agent.disable_training()
while True:
    reward, done = agent.main()
    env.render()
    time.sleep(0.01)
'''
Ejemplo n.º 6
0
    'Q_weight_decay': 0.0,
    'update_interval': 1,
    'batch_size': 100,
    'optimization_steps': 1,
    'tau': 0.005,
    'discount_factor': 0.99,
    'discard_time_limit': True,
    'warmup_step': 2500,
    'observation_normalization': False,
    'training_episodes': 101,
    'testing_gap': 10,
    'testing_episodes': 10,
    'saving_gap': 50,
}
seeds = [11, 22, 33, 44, 55, 66]
seed_returns = []
path = os.path.dirname(os.path.realpath(__file__))
for seed in seeds:

    env = pybullet_envs.make("InvertedPendulumSwingupBulletEnv-v0")
    # call render() before training to visualize (pybullet-gym-specific)
    # env.render()
    seed_path = path + '/seed' + str(seed)

    agent = DDPG(algo_params=ddpg_params, env=env, path=seed_path, seed=seed)
    agent.run(test=False)
    # the sleep argument pause the rendering for a while at every env step, useful for slowing down visualization
    # agent.run(test=True, load_network_ep=50, sleep=0.05)
    seed_returns.append(agent.statistic_dict['episode_return'])
    del env, agent