Python Random.Random 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: ray.rllib.utils.exploration.random

클래스/타입: Random

메소드/함수: Random

hotexamples.com에서의 예제들: 4

Python Random.Random - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 ray.rllib.utils.exploration.random.Random.Random에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Random(4)

get_tf_exploration_action_op(3)

get_torch_exploration_action(3)

예제 #1

파일 보기

파일: stochastic_sampling.py 프로젝트: stefanbschneider/ray

    def __init__(self,
                 action_space: gym.spaces.Space,
                 *,
                 framework: str,
                 model: ModelV2,
                 random_timesteps: int = 0,
                 **kwargs):
        """Initializes a StochasticSampling Exploration object.

        Args:
            action_space (gym.spaces.Space): The gym action space used by the
                environment.
            framework (str): One of None, "tf", "torch".
            model (ModelV2): The ModelV2 used by the owning Policy.
            random_timesteps (int): The number of timesteps for which to act
                completely randomly. Only after this number of timesteps,
                actual samples will be drawn to get exploration actions.
        """
        assert framework is not None
        super().__init__(
            action_space, model=model, framework=framework, **kwargs)

        # Create the Random exploration module (used for the first n
        # timesteps).
        self.random_timesteps = random_timesteps
        self.random_exploration = Random(
            action_space, model=self.model, framework=self.framework, **kwargs)

        # The current timestep value (tf-var or python int).
        self.last_timestep = get_variable(
            np.array(0, np.int64),
            framework=self.framework,
            tf_name="timestep",
            dtype=np.int64)

예제 #2

파일 보기

def setup_early_mixins(
    policy: Policy,
    obs_space: gym.spaces.Space,
    action_space: gym.spaces.Space,
    config: TrainerConfigDict,
) -> None:
    """Call mixin classes' constructors before Policy's initialization.

    Adds the necessary optimizers to the given Policy.

    Args:
        policy (Policy): The Policy object.
        obs_space (gym.spaces.Space): The Policy's observation space.
        action_space (gym.spaces.Space): The Policy's action space.
        config (TrainerConfigDict): The Policy's config.
    """
    policy.cur_iter = 0
    ActorCriticOptimizerMixin.__init__(policy, config)
    if config["lagrangian"]:
        policy.model.log_alpha_prime = get_variable(0.0,
                                                    framework="tf",
                                                    trainable=True,
                                                    tf_name="log_alpha_prime")
        policy.alpha_prime_optim = tf.keras.optimizers.Adam(
            learning_rate=config["optimization"]["critic_learning_rate"], )
    # Generic random action generator for calculating CQL-loss.
    policy._random_action_generator = Random(
        action_space,
        model=None,
        framework="tf2",
        policy_config=config,
        num_workers=0,
        worker_index=0,
    )

예제 #3

파일 보기

    def __init__(self,
                 action_space: Space,
                 *,
                 framework: str,
                 model: ModelV2,
                 random_timesteps: int = 1000,
                 stddev: float = 0.1,
                 initial_scale: float = 1.0,
                 final_scale: float = 0.02,
                 scale_timesteps: int = 10000,
                 scale_schedule: Optional[Schedule] = None,
                 **kwargs):
        """Initializes a GaussianNoise Exploration object.

        Args:
            random_timesteps (int): The number of timesteps for which to act
                completely randomly. Only after this number of timesteps, the
                `self.scale` annealing process will start (see below).
            stddev (float): The stddev (sigma) to use for the
                Gaussian noise to be added to the actions.
            initial_scale (float): The initial scaling weight to multiply
                the noise with.
            final_scale (float): The final scaling weight to multiply
                the noise with.
            scale_timesteps (int): The timesteps over which to linearly anneal
                the scaling factor (after(!) having used random actions for
                `random_timesteps` steps.
            scale_schedule (Optional[Schedule]): An optional Schedule object
                to use (instead of constructing one from the given parameters).
        """
        assert framework is not None
        super().__init__(action_space,
                         model=model,
                         framework=framework,
                         **kwargs)

        # Create the Random exploration module (used for the first n
        # timesteps).
        self.random_timesteps = random_timesteps
        self.random_exploration = Random(action_space,
                                         model=self.model,
                                         framework=self.framework,
                                         **kwargs)

        self.stddev = stddev
        # The `scale` annealing schedule.
        self.scale_schedule = scale_schedule or PiecewiseSchedule(
            endpoints=[(random_timesteps, initial_scale),
                       (random_timesteps + scale_timesteps, final_scale)],
            outside_value=final_scale,
            framework=self.framework)

        # The current timestep value (tf-var or python int).
        self.last_timestep = get_variable(0,
                                          framework=self.framework,
                                          tf_name="timestep")

        # Build the tf-info-op.
        if self.framework in ["tf2", "tf", "tfe"]:
            self._tf_info_op = self.get_info()

예제 #4

파일 보기

    def __init__(self,
                 action_space,
                 *,
                 random_timesteps=1000,
                 stddev=0.1,
                 initial_scale=1.0,
                 final_scale=0.02,
                 scale_timesteps=10000,
                 scale_schedule=None,
                 framework="tf",
                 **kwargs):
        """Initializes a GaussianNoise Exploration object.

        Args:
            action_space (Space): The gym action space used by the environment.
            random_timesteps (int): The number of timesteps for which to act
                completely randomly. Only after this number of timesteps, the
                `self.scale` annealing process will start (see below).
            stddev (float): The stddev (sigma) to use for the
                Gaussian noise to be added to the actions.
            initial_scale (float): The initial scaling weight to multiply
                the noise with.
            final_scale (float): The final scaling weight to multiply
                the noise with.
            scale_timesteps (int): The timesteps over which to linearly anneal
                the scaling factor (after(!) having used random actions for
                `random_timesteps` steps.
            scale_schedule (Optional[Schedule]): An optional Schedule object
                to use (instead of constructing one from the given parameters).
            framework (Optional[str]): One of None, "tf", "torch".
        """
        assert framework is not None
        super().__init__(action_space, framework=framework, **kwargs)

        self.random_timesteps = random_timesteps
        self.random_exploration = Random(action_space,
                                         framework=self.framework,
                                         **kwargs)
        self.stddev = stddev
        # The `scale` annealing schedule.
        self.scale_schedule = scale_schedule or PiecewiseSchedule(
            endpoints=[(random_timesteps, initial_scale),
                       (random_timesteps + scale_timesteps, final_scale)],
            outside_value=final_scale,
            framework=self.framework)

        # The current timestep value (tf-var or python int).
        self.last_timestep = get_variable(0,
                                          framework=self.framework,
                                          tf_name="timestep")