def experiment(variant): env_params = variant['env_params'] env = SawyerXYZReachingEnv(**env_params) es = OUStrategy(action_space=env.action_space) obs_dim = env.observation_space.low.size action_dim = env.action_space.low.size qf = FlattenMlp( input_size=obs_dim + action_dim, output_size=1, hidden_sizes=[400, 300], ) policy = TanhMlpPolicy( input_size=obs_dim, output_size=action_dim, hidden_sizes=[400, 300], ) exploration_policy = PolicyWrappedWithExplorationStrategy( exploration_strategy=es, policy=policy, ) algorithm = DDPG(env, qf=qf, policy=policy, exploration_policy=exploration_policy, **variant['algo_params']) if ptu.gpu_enabled(): algorithm.cuda() algorithm.train()
def example(variant): env_class = variant['env_class'] env_params = variant['env_params'] env = env_class(**env_params) normalize(env) es_class = variant['es_class'] es_params = dict(action_space=env.action_space, **variant['es_params']) use_gpu = variant['use_gpu'] es = es_class(**es_params) qf = FeedForwardQFunction( int(env.observation_space.flat_dim), int(env.action_space.flat_dim), 100, 100, ) policy_class = variant['policy_class'] policy_params = dict( obs_dim=get_dim(env.observation_space), action_dim=get_dim(env.action_space), fc1_size=100, fc2_size=100, ) policy = policy_class(**policy_params) exploration_policy = PolicyWrappedWithExplorationStrategy( exploration_strategy=es, policy=policy, ) algorithm = DDPG( env, qf=qf, policy=policy, exploration_policy=exploration_policy, **variant['algo_params'], ) if use_gpu and ptu.gpu_enabled(): algorithm.cuda() algorithm.train()
def experiment(variant): env_params = variant['env_params'] es_params = variant['es_params'] env = SawyerXYZReachingEnv(**env_params) es = OUStrategy(action_space=env.action_space, **es_params) hidden_sizes = variant['hidden_sizes'] obs_dim = env.observation_space.low.size action_dim = env.action_space.low.size qf = FlattenMlp( input_size=obs_dim + action_dim, output_size=1, hidden_sizes=[hidden_sizes, hidden_sizes], ) policy = TanhMlpPolicy( input_size=obs_dim, output_size=action_dim, hidden_sizes=[hidden_sizes, hidden_sizes], ) exploration_policy = PolicyWrappedWithExplorationStrategy( exploration_strategy=es, policy=policy, ) if variant['env_params']['relative_pos_control']: variant['algo_params']['max_path_length'] = 3 variant['algo_params']['num_steps_per_epoch'] = 15 variant['algo_params']['num_steps_per_eval'] = 15 algorithm = DDPG(env, qf=qf, policy=policy, exploration_policy=exploration_policy, **variant['algo_params']) if ptu.gpu_enabled(): algorithm.cuda() algorithm.train()