Python OUExploration 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: malib.policies.explorations.ou_exploration

클래스/타입: OUExploration

hotexamples.com에서의 예제들: 4

Python OUExploration - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 malib.policies.explorations.ou_exploration.OUExploration에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

OUExploration(4)

자주 사용되는 메소드들

OUExploration (4)

예제 #1

파일 보기

def get_ddpgtom_agent(env, agent_id, hidden_layer_sizes,
                      max_replay_buffer_size):
    observation_space = env.env_specs.observation_space[agent_id]
    action_space = env.env_specs.action_space[agent_id]
    return DDPGToMAgent(
        env_specs=env.env_specs,
        policy=DeterministicMLPPolicy(input_shapes=(observation_space.shape, (
            env.env_specs.action_space.opponent_flat_dim(agent_id), )),
                                      output_shape=action_space.shape,
                                      hidden_layer_sizes=hidden_layer_sizes,
                                      name='policy_agent_{}'.format(agent_id)),
        qf=MLPValueFunction(
            input_shapes=(observation_space.shape,
                          (env.env_specs.action_space.flat_dim, )),
            output_shape=(1, ),
            hidden_layer_sizes=hidden_layer_sizes,
            name='qf_agent_{}'.format(agent_id)),
        opponent_policy=DeterministicMLPPolicy(
            input_shapes=(observation_space.shape, ),
            output_shape=(
                env.env_specs.action_space.opponent_flat_dim(agent_id), ),
            hidden_layer_sizes=hidden_layer_sizes,
            name='opponent_policy_agent_{}'.format(agent_id)),
        replay_buffer=IndexedReplayBuffer(
            observation_dim=observation_space.shape[0],
            action_dim=action_space.shape[0],
            opponent_action_dim=env.env_specs.action_space.opponent_flat_dim(
                agent_id),
            max_replay_buffer_size=max_replay_buffer_size),
        exploration_strategy=OUExploration(action_space),
        gradient_clipping=10.,
        agent_id=agent_id,
    )

예제 #2

파일 보기

def get_ddpg_agent(env,
                   agent_id,
                   hidden_layer_sizes,
                   max_replay_buffer_size,
                   policy_type='dete'):
    observation_space = env.env_specs.observation_space[agent_id]
    action_space = env.env_specs.action_space[agent_id]
    if policy_type == 'dete':
        policy_fn = DeterministicMLPPolicy
        exploration_strategy = OUExploration(action_space)
    elif policy_type == 'gumble':
        policy_fn = RelaxedSoftmaxMLPPolicy
        exploration_strategy = None
    return DDPGAgent(
        env_specs=env.env_specs,
        policy=policy_fn(input_shapes=(observation_space.shape, ),
                         output_shape=action_space.shape,
                         hidden_layer_sizes=hidden_layer_sizes,
                         name='policy_agent_{}'.format(agent_id)),
        qf=MLPValueFunction(input_shapes=(observation_space.shape,
                                          action_space.shape),
                            output_shape=(1, ),
                            hidden_layer_sizes=hidden_layer_sizes,
                            name='qf_agent_{}'.format(agent_id)),
        replay_buffer=IndexedReplayBuffer(
            observation_dim=observation_space.shape[0],
            action_dim=action_space.shape[0],
            max_replay_buffer_size=max_replay_buffer_size),
        exploration_strategy=exploration_strategy,
        gradient_clipping=10.,
        agent_id=agent_id,
    )

예제 #3

파일 보기

파일: agent_factory.py 프로젝트: 11lookpig23/logic-maddpg

def get_pr2_agent(env,
                  agent_id,
                  hidden_layer_sizes,
                  max_replay_buffer_size,
                  policy_type="deter"):
    observation_space = env.env_specs.observation_space[agent_id]
    action_space = env.env_specs.action_space[agent_id]
    opponent_action_shape = (
        env.env_specs.action_space.opponent_flat_dim(agent_id), )
    print(opponent_action_shape, "opponent_action_shape")
    if policy_type == "dete":
        policy_fn = DeterministicMLPPolicy
        exploration_strategy = OUExploration(action_space)
    elif policy_type == "gumble":
        policy_fn = RelaxedSoftmaxMLPPolicy
        exploration_strategy = None
    return PR2Agent(
        env_specs=env.env_specs,
        policy=policy_fn(
            input_shapes=(observation_space.shape, ),
            output_shape=action_space.shape,
            hidden_layer_sizes=hidden_layer_sizes,
            name="policy_agent_{}".format(agent_id),
        ),
        qf=MLPValueFunction(
            input_shapes=(
                observation_space.shape,
                action_space.shape,
                opponent_action_shape,
            ),
            output_shape=(1, ),
            hidden_layer_sizes=hidden_layer_sizes,
            name="qf_agent_{}".format(agent_id),
        ),
        ind_qf=MLPValueFunction(
            input_shapes=(observation_space.shape, action_space.shape),
            output_shape=(1, ),
            hidden_layer_sizes=hidden_layer_sizes,
            name="ind_qf_agent_{}".format(agent_id),
        ),
        replay_buffer=IndexedReplayBuffer(
            observation_dim=observation_space.shape[0],
            action_dim=action_space.shape[0],
            max_replay_buffer_size=max_replay_buffer_size,
            opponent_action_dim=opponent_action_shape[0],
        ),
        opponent_policy=policy_fn(
            input_shapes=(observation_space.shape, action_space.shape),
            output_shape=opponent_action_shape,
            hidden_layer_sizes=hidden_layer_sizes,
            name="opponent_policy_agent_{}".format(agent_id),
        ),
        exploration_strategy=exploration_strategy,
        gradient_clipping=10.0,
        agent_id=agent_id,
    )

예제 #4

파일 보기

파일: agent_factory.py 프로젝트: 11lookpig23/logic-maddpg

def get_commnet_agent(env,
                      agent_id,
                      hidden_layer_sizes,
                      max_replay_buffer_size,
                      policy_type="deter"):
    observation_space = env.env_specs.observation_space[agent_id]
    n = env.env_specs.agent_num
    action_space = env.env_specs.action_space[agent_id]
    if policy_type == "deter":
        policy_fn = DeterministicMLPPolicy
        exploration_strategy = OUExploration(action_space)
    elif policy_type == "gumble":
        policy_fn = RelaxedSoftmaxMLPPolicy
        exploration_strategy = None
    return FullyCentralizedAgent(
        env_specs=env.env_specs,
        policy=policy_fn(
            input_shapes=((n, ) + observation_space.shape, ),
            output_shape=action_space.shape,
            hidden_layer_sizes=hidden_layer_sizes,
            name="policy_agent_{}".format(agent_id),
        ),
        qf=CommNetValueFunction(
            input_shapes=((n, ) + observation_space.shape,
                          (n, ) + action_space.shape),
            output_shape=(1, ),
            hidden_layer_sizes=hidden_layer_sizes,
            name="qf_agent_{}".format(agent_id),
        ),
        replay_buffer=IndexedReplayBuffer(
            observation_dim=n * observation_space.shape[0],
            action_dim=n * action_space.shape[0],
            max_replay_buffer_size=max_replay_buffer_size,
            reward_dim=n,
            terminal_dim=n,
        ),
        exploration_strategy=exploration_strategy,
        gradient_clipping=10.0,
        agent_id=agent_id,
    )