Exemple #1
0
 def save_config(self):
     """
         Save config into a yaml file in root experiment directory
     """
     folder = Path(self.session_config.folder)
     folder.mkdir(exist_ok=True, parents=True)
     config = Config(learner_config=self.learner_config,
                     env_config=self.env_config,
                     session_config=self.session_config)
     config.dump_file(str(folder / 'config.yml'))
Exemple #2
0
 def __init__(self, env, learner_config, session_config):
     """
     Default sender configs are in BASE_SESSION_CONFIG['sender']
     They contain communication level information
     
     Algorithm specific experience generation parameters should live in learner_config
     """
     super().__init__(env)
     # TODO: initialize config in a unified place
     self.session_config = Config(session_config).extend(
         BASE_SESSION_CONFIG)
     self.learner_config = Config(learner_config).extend(
         BASE_LEARNER_CONFIG)
     host = os.environ['SYMPH_COLLECTOR_FRONTEND_HOST']
     port = os.environ['SYMPH_COLLECTOR_FRONTEND_PORT']
     self.sender = ExpSender(
         host=host,
         port=port,
         flush_iteration=self.session_config.sender.flush_iteration,
     )
Exemple #3
0
PPO_DEFAULT_LEARNER_CONFIG = Config({
    'model': {
        'convs': [],  # this can wait until TorchX
        'actor_fc_hidden_sizes': [300, 200],
        'critic_fc_hidden_sizes': [300, 200],
        'cnn_feature_dim': 256,
        'use_layernorm': False,
    },
    'algo': {
        # base configs
        # 'agent_class': 'PPOAgent',
        # 'learner_class': 'PPOLearner',
        # 'experience': 'ExpSenderWrapperMultiStepMovingWindowWithInfo',
        'use_z_filter': True,
        'use_r_filter': False,
        'gamma': .995,
        'n_step': 25,  # 10 for without RNN
        'stride': 20,  # 10 for without RNN
        'network': {
            'lr_actor': 1e-4,
            'lr_critic': 1e-4,
            'clip_actor_gradient': True,
            'actor_gradient_norm_clip': 5.,
            'clip_critic_gradient': True,
            'critic_gradient_norm_clip': 5.,
            'actor_regularization': 0.0,
            'critic_regularization': 0.0,
            'anneal': {
                'lr_scheduler': "LinearWithMinLR",
                'frames_to_anneal': 5e6,
                'lr_update_frequency': 100,
                'min_lr': 5e-5,
            },
        },

        # ppo specific parameters:
        'ppo_mode': 'adapt',
        'advantage': {
            'norm_adv': True,
            'lam': 0.97,
            'reward_scale': 1.0,
        },
        'rnn': {
            'if_rnn_policy': True,
            'rnn_hidden': 100,
            'rnn_layer': 1,
            'horizon': 5,
        },
        'consts': {
            'init_log_sig': -1.0,
            'log_sig_range': 0.25,
            'epoch_policy': 10,
            'epoch_baseline': 10,
            'adjust_threshold':
            (0.5, 2.0),  # threshold to magnify clip epsilon
            'kl_target':
            0.015,  # target KL divergence between before and after
        },
        'adapt_consts': {
            'kl_cutoff_coeff': 250,  # penalty coeff when kl large
            'beta_init': 1.0,  # original beta
            'beta_range':
            (1 / 35.0, 35.0),  # range of the adapted penalty factor
            'scale_constant': 1.5,
        },
        'clip_consts': {
            'clip_epsilon_init': 0.2,  # factor of clipped loss
            'clip_range': (0.05, 0.3),  # range of the adapted penalty factor
            'scale_constant': 1.2,
        },
    },
    'replay': {
        # 'replay_class': 'FIFOReplay',
        'batch_size': 64,
        'memory_size': 96,
        'sampling_start_size': 64,
        'replay_shards': 1,
    },
    'parameter_publish': {
        'exp_interval': 4096,
    },
})
Exemple #4
0
DDPG_DEFAULT_LEARNER_CONFIG = Config({
    'model': {
        'convs': [],
        'actor_fc_hidden_sizes': [300, 200],
        'critic_fc_hidden_sizes': [400, 300],
        'use_layernorm': False,
        'conv_spec': {
            # First conv layer: 16 out channels, second layer 32 channels
            'out_channels': [16, 32],
            # First conv layer: kernel size 8, second layer kernel size 4
            'kernel_sizes': [8, 4],
            # First conv layer: stride=4, second layer stride=2
            'strides': [4, 2],
            # After final convolution, reshapes output to flat tensor and feed through mlp with output of this size
            'hidden_output_dim': 200,
        },
    },
    'algo': {
        'gamma': .99,
        # Unroll the bellman update
        'n_step': 3,
        # Send experiences every `stride` steps
        'stride': 1,
        'network': {
            'lr_actor': 1e-4,
            'lr_critic': 1e-3,
            'clip_actor_gradient': True,
            'actor_gradient_value_clip': 1.,
            'clip_critic_gradient': False,
            'critic_gradient_value_clip': 5.,
            # Weight regularization
            'actor_regularization': 0.0,
            'critic_regularization': 0.0,
            # beta version: see https://arxiv.org/pdf/1802.09477.pdf and
            # https://github.com/sfujim/TD3/blob/master/TD3.py
            # for action regularization and double critic algorithm details
            'use_action_regularization': False,
            'use_double_critic': False,
            'target_update': {
                # Soft: after every iteration, target_params = (1 - tau) * target_params + tau * params
                #'type': 'soft',
                #'tau': 1e-3,
                # Hard: after `interval` iterations, target_params = params
                'type': 'hard',
                'interval': 500,
            },
        },
        'exploration': {
            # Beta implementation of parameter noise:
            # see https://blog.openai.com/better-exploration-with-parameter-noise/ for algorithm details
            'param_noise_type': None,

            # normal parameter noise applies gaussian noise over the agent's parameters
            # 'param_noise_type': 'normal',

            # adaptive parameter noise scales the noise sigma up or down in order to achieve the target action
            # standard deviation
            # 'param_noise_type': 'adaptive_normal',
            'param_noise_sigma': 0.05,
            'param_noise_alpha': 1.15,
            'param_noise_target_stddev': 0.005,

            # Vanilla noise: applies gaussian noise on every action
            'noise_type': 'normal',
            'max_sigma': 1.0,

            # Or, use Ornstein-Uhlenbeck noise instead of gaussian
            #'noise_type': 'ou_noise',
            'theta': 0.15,
            'dt': 1e-3,
        },
    },
    'replay': {
        'batch_size': 512,
        'memory_size':
        int(1000000 /
            3),  # The total replay size is memory_size * replay_shards
        'sampling_start_size': 3000,
        'replay_shards': 3,
    },
    'parameter_publish': {
        # Minimum amount of time (seconds) between two parameter publish
        'min_publish_interval': 3,
    },
})
from surreal.env.mujocomanip.default_env_configs import *
from surreal.env.mujocomanip.default_object_configs import *
from surreal.env.mujocomanip.mujocomanip_envs import *
import copy
import numpy as np

from surreal.session import Config

env_config = DEFAULT_STACKER_CONFIG
env_config.display = True
object_configs = []
for x in range(5):
    one_config = DEFAULT_RANDOM_BOX_CONFIG  # This doesn't work
    one_config = Config(DEFAULT_RANDOM_BOX_CONFIG.to_dict())
    one_config.seed = np.random.randint(100000)
    object_configs.append(one_config)
env_config.mujoco_objects_spec = object_configs
env_config.obs_spec.dim = 9 * len(object_configs) + 28
env = SurrealSawyerStackEnv(env_config)

obs, info = env.reset()
while True:
    obs, info = env.reset()

    ### TODO: we should implement
    ### TODO: this might need clipping ###
    action = np.random.randn(8)
    # action[7] *= 0.020833
    for i in range(2000):
        action = np.random.randn(8) / 2
        action[7] = -1
import numpy as np
import imageio
from surreal.env import make_env
from surreal.session import Config

env_config = Config({
    'env_name': 'mujocomanip:BaxterLiftEnv',
    'pixel_input': True,
    'frame_stacks': 3,
    'sleep_time': 0.0,
    # 'limit_episode_length': 200, # 0 means no limit
    'limit_episode_length': 1000,  # 0 means no limit
    'video': {
        'record_video': True,
        'save_folder': None,
        'max_videos': 500,
        'record_every': 100,
    },
    'observation': {
        'pixel': ['camera0', 'depth'],
        # if using ObservationConcatWrapper, low_dim inputs will be concatenated into 'flat_inputs'
        # 'low_dim':['position', 'velocity', 'proprio', 'cube_pos', 'cube_quat', 'gripper_to_cube'],
        'low_dim': ['position', 'velocity', 'proprio'],
    },
})

writer = imageio.get_writer('baxter_lift.mp4', fps=20)
env, env_config = make_env(env_config)

obs = env.reset()
for i in range(1000):