Exemple #1
0
                                         set_s[i].actor.layerNmu.bias))
            noise = torch.FloatTensor(noise)
            noise = torch.mul(set_s[i].actor.layerNmu.bias.data, noise)
            set_s[i].actor.layerNmu.bias.data = copy.deepcopy(
                set_s[i].actor.layerNmu.bias.data + noise)

        return set_s


if __name__ == "__main__":
    parse_arguments()
    args = parser.parse_args()
    args.env_name = "Springmass-v0"
    print("Running environment" + str(args.env_name))

    env = NormalizedActions(gym.make(args.env_name))
    # env = wrappers.Monitor(env, '/tmp/{}-experiment'.format(args.env_name), force=True)
    env.seed(args.seed)

    torch.manual_seed(args.seed)
    np.random.seed(args.seed)
    '''
    DEFINE THE ACTOR RL AGENT
    '''
    if args.algo == "NAF":
        agent = NAF(args.gamma, args.tau, args.hidden_size,
                    env.observation_space.shape[0], env.action_space)
        print("Initialized NAF")
    else:
        agent = DDPG(args.gamma, args.tau, args.hidden_size,
                     env.observation_space.shape[0], env.action_space)
Exemple #2
0
                    help='number of episodes (default: 128)')
parser.add_argument('--updates_per_step',
                    type=int,
                    default=5,
                    metavar='N',
                    help='model updates per simulator step (default: 5)')
parser.add_argument('--num-stack',
                    type=int,
                    default=1,
                    help='number of frames to stack')
parser.add_argument('--model-suffix',
                    default="",
                    help='To resume training or not')
args = parser.parse_args()

env = NormalizedActions(gym.make(args.env_name))

writer = SummaryWriter()

env.seed(args.seed)
if torch.cuda.is_available():
    device = torch.device("cuda:0")
    torch.cuda.manual_seed(args.seed)
else:
    device = torch.device("cpu")
    torch.manual_seed(args.seed)

np.random.seed(args.seed)

obs_shape = env.observation_space.shape
obs_shape = (obs_shape[0] * args.num_stack, *obs_shape[1:])
Exemple #3
0
                    help='random seed (default: 4)')
parser.add_argument('--batch_size', type=int, default=128, metavar='N',
                    help='batch size (default: 128)')
parser.add_argument('--num_steps', type=int, default=1000, metavar='N',
                    help='max episode length (default: 1000)')
parser.add_argument('--num_episodes', type=int, default=1000, metavar='N',
                    help='number of episodes (default: 1000)')
parser.add_argument('--hidden_size', type=int, default=128, metavar='N',
                    help='number of episodes (default: 128)')
parser.add_argument('--updates_per_step', type=int, default=5, metavar='N',
                    help='model updates per simulator step (default: 5)')
parser.add_argument('--replay_size', type=int, default=1000000, metavar='N',
                    help='size of replay buffer (default: 1000000)')
args = parser.parse_args()

env = NormalizedActions(gym.make(args.env_name))

writer = SummaryWriter()

env.seed(args.seed)
torch.manual_seed(args.seed)
np.random.seed(args.seed)
if args.algo == "NAF":
    agent = NAF(args.gamma, args.tau, args.hidden_size,
                      env.observation_space.shape[0], env.action_space)
else:
    agent = DDPG(args.gamma, args.tau, args.hidden_size,
                      env.observation_space.shape[0], env.action_space)

memory = ReplayMemory(args.replay_size)
Exemple #4
0
if args.gpu >= 0:
    print("gpu ok")
    ptu.set_gpu_mode(True, args.gpu)
# set env
if args.env_name == 'Humanoidrllab':
    from rllab.envs.mujoco.humanoid_env import HumanoidEnv
    from rllab.envs.normalized_env import normalize
    env = normalize(HumanoidEnv())
    max_episode_steps = float('inf')
    if args.seed >= 0:
        global seed_
        seed_ = args.seed
else:
    env = gym.make(args.env_name)
    max_episode_steps=env._max_episode_steps
    env=NormalizedActions(env)
    if args.seed >= 0:
        env.seed(args.seed)
if args.seed >= 0:
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)
    random.seed(args.seed)
    torch.random.manual_seed(args.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(args.seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

# set args
args.num_actions = env.action_space.shape[0]
args.max_action = env.action_space.high
parser.add_argument('--batch_size', type=int, default=256, metavar='N',
                    help='batch size (default: 256)')
parser.add_argument('--num_steps', type=int, default=1000001, metavar='N',
                    help='maximum number of steps (default: 1000000)')
parser.add_argument('--hidden_size', type=int, default=256, metavar='N',
                    help='hidden size (default: 256)')
parser.add_argument('--updates_per_step', type=int, default=1, metavar='N',
                    help='model updates per simulator step (default: 1)')
parser.add_argument('--target_update_interval', type=int, default=1, metavar='N',
                    help='Value target update per no. of updates per step (default: 1)')
parser.add_argument('--replay_size', type=int, default=1000000, metavar='N',
                    help='size of replay buffer (default: 10000000)')
args = parser.parse_args()

# Environment
env = NormalizedActions(gym.make(args.env_name))
env.seed(args.seed)
torch.manual_seed(args.seed)
np.random.seed(args.seed)

# Agent
agent = SAC(env.observation_space.shape[0], env.action_space, args)

writer = SummaryWriter()

# Memory
memory = ReplayMemory(args.replay_size)

# Training Loop
rewards = []
total_numsteps = 0
Exemple #6
0
    type=str,
    default='min',
    choices=['min', 'max', 'mean'],
    help=
    'The Q value for each sample is determined based on this operator over the two Q networks.'
)
parser.add_argument('--temp',
                    type=float,
                    default=1.0,
                    help='Boltzman Temperature for normalizing actions')

args = parser.parse_args()

assert args.num_outputs > 0

env = NormalizedActions(gym.make(args.env_name))
eval_env = NormalizedActions(gym.make(args.env_name))

if args.policy_type == 'generative':
    agent = Generative(gamma=args.gamma,
                       tau=args.tau,
                       num_inputs=env.observation_space.shape[0],
                       action_space=env.action_space,
                       replay_size=args.replay_size,
                       num_outputs=args.num_outputs,
                       q_normalization=args.q_normalization,
                       target_policy=args.target_policy,
                       target_policy_q=args.target_policy_q,
                       normalize_obs=args.normalize_obs,
                       normalize_returns=args.normalize_rew,
                       autoregressive=not args.not_autoregressive,
parser.add_argument('--iter', type=int, default=10, metavar='N', help='number of iterations of solving x constraits')
# Save & render 
parser.add_argument('--render', action='store_true', help='render the environment')
parser.add_argument('--ckpt_freq', type=int, default=2, help='model saving frequency')
parser.add_argument('--display', type=bool, default=False, help='display or not')
args = parser.parse_args()


'''
Initiate enviornment
'''
env_name = args.env_name
env = gym.make(env_name)
if type(env.action_space) != gym.spaces.discrete.Discrete:
    from LPO_continuous import LPO
    env = NormalizedActions(gym.make(env_name))
else:
    from LPO_discrete import LPO

if args.display:
    env = wrappers.Monitor(env, '/tmp/{}-experiment'.format(env_name), force=True)

env.seed(args.seed)
torch.manual_seed(args.seed)
np.random.seed(args.seed)

ckpt = 'ckpt_' + env_name
if not os.path.exists(ckpt):    
    os.mkdir(ckpt)

'''
            if random.random() < alpha:
                action = noise.sample(action.shape).view(action.shape)

            state, reward, done, _ = _env.step(action.cpu().numpy()[0])
            total_reward += reward

            state = agent.Tensor([state])
            if done:
                break
    return total_reward


test_episodes = 100
for env_name in [args.env]:#os.listdir(base_dir):
    
    env = NormalizedActions(gym.make(env_name))

    agent = DDPG(beta=0.9, epsilon=0, learning_rate=1e-4, gamma=0.99, tau=0.01, hidden_size_dim0=args.hidden_size, hidden_size_dim1=args.hidden_size, num_inputs=env.observation_space.shape[0], action_space=env.action_space, train_mode=False, alpha=0, replay_size=0, optimizer = 0, two_player=args.two_player, normalize_obs=True)
    noise = uniform.Uniform(agent.Tensor([-1.0]), agent.Tensor([1.0]))

    basic_bm = copy.deepcopy(env.env.env.model.body_mass.copy())

    env_dir = base_dir + env_name + '/'
    for optimizer in [args.optimizer]: #['RMSprop', 'SGLD_thermal_0.01', 'SGLD_thermal_0.001', 'SGLD_thermal_0.0001', 'SGLD_thermal_1e-05']:
        for noise_type in [args.action_noise]: 
            noise_dir = env_dir + optimizer + '/' + noise_type + '/nr_mdp_' + str(args.alpha) + '_1/'	
            if os.path.exists(noise_dir):
                for subdir in sorted(os.listdir(noise_dir)):
                    results = {}
                    
                    run_number = 0
Exemple #9
0
    s = env.reset()
    rew = 0.
    for t in range(T):
        a = policy.get_action(s)
        s, r, done, _ = env.step(a)
        rew += r
        if done:
            break
    return rew


if __name__ == '__main__':

    env_name = args.env
    try:
        env = NormalizedActions(envs.env_list[env_name](render=args.render))
    except TypeError as err:
        print('no argument render,  assumping env.render will just work')
        env = NormalizedActions(envs.env_list[env_name]())
    assert np.any(np.abs(env.action_space.low) <= 1.) and np.any(
        np.abs(env.action_space.high) <= 1.), 'Action space not normalizd'
    env.reset()

    env.seed(args.seed)
    np.random.seed(args.seed)
    random.seed(args.seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.manual_seed(args.seed)

    now = datetime.now()
Exemple #10
0
parser.set_defaults(done_util=True)

parser.add_argument('--render', dest='render', action='store_true')
parser.add_argument('--no_render', dest='render', action='store_false')
parser.set_defaults(render=False)

parser.add_argument('--record', dest='record', action='store_true')
parser.add_argument('--no-record', dest='record', action='store_false')
parser.set_defaults(record=False)
args = parser.parse_args()

if __name__ == '__main__':

    env_name = args.env
    try:
        env = NormalizedActions(envs.env_list[env_name](render=args.render))
    except TypeError as err:
        print('no argument render,  assumping env.render will just work')
        env = NormalizedActions(envs.env_list[env_name]())

    assert np.any(np.abs(env.action_space.low) <= 1.) and np.any(
        np.abs(env.action_space.high) <= 1.), 'Action space not normalizd'

    if args.record:
        env = gym.wrappers.Monitor(env,
                                   './data/vid/mpc/{}-{}'.format(
                                       env_name, args.frame),
                                   force=True)
    env.reset()

    env.seed(args.seed)
Exemple #11
0
    print("=== HYPERPARAMETERS ===")
    for key in hp:
        print(f"{key} : {hp[key]}")
    print("=======================")
    logger.debug("Initial setup completed.")
    # Create JSON of Hyper-Parameters for reproducibility
    with open("./runs/" + folder + "hp.json", 'w') as outfile:
        json.dump(vars(args), outfile)
    cnn = args.pics
    for i_run in range(args.max_num_run):
        logger.important(f"START TRAINING RUN {i_run}")

        # Make the environment
        env = gym.make(args.env_name)
        env._max_episode_steps = args.max_num_step
        env = NormalizedActions(env)
        if cnn:
            env = ImageWrapper(args.img_size, env)

        # Set Seed for repeatability
        torch.manual_seed(args.seed + i_run)
        np.random.seed(args.seed + i_run)
        env.seed(args.seed + i_run)
        env.action_space.np_random.seed(args.seed + i_run)

        # Setup the agent
        agent = SAC(args.state_buffer_size, env.action_space, args)

        # Setup TensorboardX
        writer_train = SummaryWriter(log_dir='runs/' + folder + 'run_' +
                                     str(i_run) + '/train')
parser.add_argument('--num_steps', type=int, default=1000, metavar='N', help='max episode length (default: 1000)')
parser.add_argument('--num_rollouts', type=int, default=2000, metavar='N', help='number of rollouts (default: 2000)')
parser.add_argument('--hidden_size', type=int, default=15, metavar='N', help='number of hidden neurons (default: 100)')
parser.add_argument('--constraint_size',type=int, default=10, metavar='N', help='number of constraint to be solved each time')
parser.add_argument('--layers', type=int, default=2, metavar='N', help='number of layers inf the policy NN')
# Save & render 
parser.add_argument('--render', action='store_true', help='render the environment')
parser.add_argument('--ckpt_freq', type=int, default=200, help='model saving frequency')
parser.add_argument('--display', type=bool, default=False, help='display or not')
args = parser.parse_args()

env_name = args.env_name
env = gym.make(env_name)
if type(env.action_space) != gym.spaces.discrete.Discrete:
    from LPO_continuous import LPO
    env = NormalizedActions(gym.make(env_name))
else:
    from LPO_discrete import LPO

if args.display:
    env = wrappers.Monitor(env, '/tmp/{}-experiment'.format(env_name), force=True)

env.seed(args.seed)
torch.manual_seed(args.seed)
np.random.seed(args.seed)

if args.layers == 1:
    policy = SingleLayerPolicy(args.hidden_size, env.observation_space.shape[0], env.action_space)
elif args.layers== 2:
    policy = TwoLayerPolicy(args.hidden_size, env.observation_space.shape[0], env.action_space)
agent = LPO(args.hidden_size, env.observation_space.shape[0], env.action_space, args.constraint_size, policy)
Exemple #13
0
parser.add_argument('--warmup',
                    type=int,
                    default=10000,
                    help='Number of insertions before updates')

args = parser.parse_args()

try:
    os.makedirs(args.log_dir)
except OSError:
    files = glob.glob(os.path.join(args.log_dir, '*.monitor.csv'))
    for f in files:
        os.remove(f)

if not args.discrete:
    env = NormalizedActions(gym.make(args.env_name))
else:
    env = [
        make_env(args.env_name, args.seed, i, args.log_dir, False)
        for i in range(args.num_processes)
    ]
    env = SubprocVecEnv(env)
#writer = SummaryWriter()

if args.vis:
    from visdom import Visdom
    viz = Visdom(port=8097, server='http://eos11')
    win = None

#env.seed(args.seed)
torch.manual_seed(args.seed)
Exemple #14
0
if __name__ == '__main__':
    env = sys.argv[1]
    args = None

    if env == 'mc':
        args = args_mc
    elif env == 'pd':
        args = args_pd
    elif env == 'll':
        args = args_ll
    else:
        print('Environment not selected, Please choose from: mc, pd,ll')
        exit(-1)

    env = NormalizedActions(gym.make(args['env_name']))

    env.seed(args['seed'])
    torch.manual_seed(args['seed'])
    np.random.seed(args['seed'])

    agent = NAF(args['gamma'], args['tau'], args['hidden_size'],
                env.observation_space.shape[0], env.action_space)
    agent.load_model(f'models/naf_{args["env_name"]}')

    replay_buffer = ReplayBuffer(args['replay_size'])

    ounoise = OUNoise(env.action_space.shape[0]) if args['ou_noise'] else None

    run()
Exemple #15
0
import numpy as np

from normalized_actions import NormalizedActions

if __name__ == '__main__':

    args = get_args()

    # initialize environment
    env_name = args.env_name
    env = gym.make(env_name)

    # choose agent according to action space
    if type(env.action_space) != gym.spaces.discrete.Discrete:
        from reinforce_continuous import REINFORCE
        env = NormalizedActions(gym.make(env_name))
    else:
        from reinforce_discrete import REINFORCE

    if args.display:
        env = wrappers.Monitor(env,
                               '/tmp/{}-experiment'.format(env_name),
                               force=True)

    env.seed(args.seed)
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)

    agent = REINFORCE(args.hidden_size, env.observation_space.shape[0],
                      env.action_space)