Ejemplo n.º 1
0
                        action='store_true',
                        default=False)

    args = parser.parse_args()

    env, environment_params, environment_wrappers, environment_wrapper_arguments = choose_env(
        ENV_NAME)
    prefix = ENV_NAME + str(len(environment_params["parameters_to_randomise"])
                            )  # number of randomised parameters
    model_path = '../../../../../data/epi/model/' + prefix + '_epi'
    if TASK_POLICY_ALG == 'td3':
        # load td3 hyper parameters
        [action_range, batch_size, explore_steps, update_itr, explore_noise_scale, eval_noise_scale, reward_scale, \
        hidden_dim, noise_decay, policy_target_update_interval, q_lr, policy_lr, replay_buffer_size, DETERMINISTIC] = \
            load_params('td3', ['action_range', 'batch_size', 'explore_steps', 'update_itr', 'explore_noise_scale',\
            'eval_noise_scale', 'reward_scale', 'hidden_dim', 'noise_decay', \
                'policy_target_update_interval', 'q_lr', 'policy_lr','replay_buffer_size', 'deterministic'] )

        # load replay buffer when off-policy
        BaseManager.register('ReplayBuffer', ReplayBuffer)
        manager = BaseManager()
        manager.start()
        replay_buffer = manager.ReplayBuffer(
            replay_buffer_size)  # share the replay buffer through manager
    elif TASK_POLICY_ALG == 'ppo':
        [batch_size] = load_params('ppo', ['batch_size'])

    epi = EPI(env, data_path='./data/' + ENV_NAME, model_path=model_path)

    if args.epi:
        epi.embedding_learn(env,
from sim2real_policies.utils.envs import make_env
from sim2real_policies.utils.evaluate import evaluate
from sim2real_policies.utils.optimizers import SharedAdam, ShareParameters
from sim2real_policies.utils.load_params import load_params

#####  hyper-parameters for RL training  ############
ENV_NAME = ['SawyerReach', 'SawyerPush', 'SawyerSlide'][0]  # environment name
EP_MAX = 100000  # total number of episodes for training
EP_LEN = 200  # total number of steps for each episode
prefix = ''
MODEL_PATH = '../../../../data/ppo/model/' + prefix + 'ppo'
NUM_WORKERS = 1  # or: mp.cpu_count()
EVAL_INTERVAL = 100

[ACTION_RANGE, BATCH, GAMMA, RANDOMSEED, A_UPDATE_STEPS, C_UPDATE_STEPS, EPS,\
    A_LR, C_LR, METHOD] = load_params('ppo', ['action_range', 'batch_size', 'gamma', 'random_seed', 'actor_update_steps', \
    'critic_update_steps', 'eps', 'actor_lr', 'critic_lr', 'method' ])

###############################  PPO  ####################################


class PPO(object):
    '''
    PPO class
    '''
    def __init__(self, state_space, action_space, hidden_dim=512):
        self.actor = PPO_PolicyNetwork(state_space,
                                       action_space,
                                       hidden_dim,
                                       action_range=ACTION_RANGE)
        self.critic = ValueNetwork(state_space, hidden_dim)
        self.actor_optimizer = SharedAdam(self.actor.parameters(), lr=A_LR)