Esempi in Python per Config.max_steps

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: machin.utils.conf

Classe/tipologia: Config

Metodo/funzione: max_steps

Esempi su hotexamples.com: 5

Config.max_steps in Python: 5 esempi trovati. Questi sono i migliori esempi reali in Python per machin.utils.conf.Config.max_steps, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

Config(15)

env(5)

observe_dim(5)

solved_reward(5)

max_steps(5)

max_episodes(5)

solved_repeat(5)

env_name(5)

device(5)

replay_size(5)

action_num(3)

action_dim(2)

action_range(2)

test_agent_num(1)

test_observe_dim(1)

value_max(1)

test_action_range(1)

test_action_dim(1)

noise_interval(1)

reward_future_steps(1)

noise_param(1)

noise_mode(1)

noise(1)

conf2(1)

conf1(1)

agent_num(1)

value_min(1)

Esempio n. 1

Mostra file

File: test_rainbow.py Progetto: yueweizhizhu/machin

    def train_config(self, gpu):
        disable_view_window()
        c = Config()
        # Note: online policy algorithms such as PPO and A2C does not
        # work well in Pendulum (reason unknown)
        # and MountainCarContinuous (sparse returns)
        c.env_name = "CartPole-v0"
        c.env = unwrap_time_limit(gym.make(c.env_name))
        c.observe_dim = 4
        c.action_num = 2
        # maximum and minimum of reward value
        # since reward is 1 for every step, maximum q value should be
        # below 20(reward_future_steps) * (1 + discount ** n_steps) < 40
        c.value_max = 40
        c.value_min = 0
        c.reward_future_steps = 20
        c.max_episodes = 1000
        c.max_steps = 200
        c.replay_size = 100000

        # RAINBOW is not very stable (without dueling and noisy linear)
        # compared to other DQNs
        c.solved_reward = 180
        c.solved_repeat = 5
        c.device = gpu
        return c

Esempio n. 2

Mostra file

File: test_maddpg.py Progetto: ikamensh/machin

 def train_config(self):
     disable_view_window()
     c = Config()
     # the cooperative environment environment provided in
     # https://github.com/openai/multiagent-particle-envs
     c.env_name = "simple_spread"
     c.env = create_env(c.env_name)
     c.env.discrete_action_input = True
     c.agent_num = 3
     c.action_num = c.env.action_space[0].n
     c.observe_dim = c.env.observation_space[0].shape[0]
     # for contiguous tests
     c.test_action_dim = 5
     c.test_action_range = 1
     c.test_observe_dim = 5
     c.test_agent_num = 3
     c.max_episodes = 1000
     c.max_steps = 200
     c.replay_size = 100000
     # from https://github.com/wsjeon/maddpg-rllib/tree/master/plots
     # PROBLEM: I have no idea how they calculate the rewards
     # I cannot replicate their reward curve
     c.solved_reward = -15
     c.solved_repeat = 5
     return c

Esempio n. 3

Mostra file

File: test_sac.py Progetto: TrendingTechnology/machin

 def train_config(self):
     disable_view_window()
     c = Config()
     c.env_name = "Pendulum-v0"
     c.env = unwrap_time_limit(gym.make(c.env_name))
     c.observe_dim = 3
     c.action_dim = 1
     c.action_range = 2
     c.max_episodes = 1000
     c.max_steps = 200
     c.replay_size = 100000
     c.solved_reward = -400
     c.solved_repeat = 5
     return c

Esempio n. 4

Mostra file

 def train_config(self):
     disable_view_window()
     c = Config()
     # Note: online policy algorithms such as PPO and A2C does not
     # work well in Pendulum (reason unknown)
     # and MountainCarContinuous (sparse returns)
     c.env_name = "CartPole-v0"
     c.env = unwrap_time_limit(gym.make(c.env_name))
     c.observe_dim = 4
     c.action_num = 2
     c.max_episodes = 2000  # the actor learns a little bit slower
     c.max_steps = 200
     c.replay_size = 10000
     c.solved_reward = 150
     c.solved_repeat = 5
     return c

Esempio n. 5

Mostra file

 def train_config(self, pytestconfig):
     disable_view_window()
     c = Config()
     c.env_name = "Pendulum-v0"
     c.env = unwrap_time_limit(gym.make(c.env_name))
     c.observe_dim = 3
     c.action_dim = 1
     c.action_range = 2
     c.max_episodes = 1000
     c.max_steps = 200
     c.noise_param = (0, 0.2)
     c.noise_mode = "normal"
     c.noise_interval = 2
     c.replay_size = 100000
     c.solved_reward = -150
     c.solved_repeat = 5
     c.device = "cpu"
     return c