def setUp(self) -> None: self.env = ToTensor(gym.make("CartPole-v0")) self.obs_shape = self.env.observation_space.shape self.n_actions = self.env.action_space.n self.net = MLP(self.obs_shape, self.n_actions) self.agent = Agent(self.net) self.xp_stream = EpisodicExperienceStream(self.env, self.agent, Mock(), episodes=4) self.rl_dataloader = DataLoader(self.xp_stream) parent_parser = argparse.ArgumentParser(add_help=False) parent_parser = cli.add_base_args(parent=parent_parser) parent_parser = DQN.add_model_specific_args(parent_parser) args_list = [ "--algo", "dqn", "--warm_start_steps", "500", "--episode_length", "100", ] self.hparams = parent_parser.parse_args(args_list) self.model = Reinforce(**vars(self.hparams))
def setUp(self) -> None: self.env = ToTensor(gym.make("CartPole-v0")) self.obs_shape = self.env.observation_space.shape self.n_actions = self.env.action_space.n self.net = MLP(self.obs_shape, self.n_actions) self.agent = Agent(self.net) parent_parser = argparse.ArgumentParser(add_help=False) parent_parser = cli.add_base_args(parent=parent_parser) parent_parser = PolicyGradient.add_model_specific_args(parent_parser) args_list = [ "--episode_length", "100", "--env", "CartPole-v0", ] self.hparams = parent_parser.parse_args(args_list) self.model = PolicyGradient(**vars(self.hparams))
def setUp(self) -> None: self.env = ToTensor(gym.make("CartPole-v0")) self.obs_shape = self.env.observation_space.shape self.n_actions = self.env.action_space.n self.net = MLP(self.obs_shape, self.n_actions) self.agent = Agent(self.net) self.exp_source = DiscountedExperienceSource(self.env, self.agent) parent_parser = argparse.ArgumentParser(add_help=False) parent_parser = cli.add_base_args(parent=parent_parser) parent_parser = DQN.add_model_specific_args(parent_parser) args_list = [ "--algo", "dqn", "--warm_start_steps", "500", "--episode_length", "100", "--env", "CartPole-v0", "--batch_size", "32", "--gamma", "0.99" ] self.hparams = parent_parser.parse_args(args_list) self.model = Reinforce(**vars(self.hparams)) self.rl_dataloader = self.model.train_dataloader()
def __init__(self, env: str, gamma: float = 0.99, lr: float = 1e-4, batch_size: int = 32, entropy_beta: float = 0.01, batch_episodes: int = 4, *args, **kwargs) -> None: """ PyTorch Lightning implementation of `Vanilla Policy Gradient <https://papers.nips.cc/paper/ 1713-policy-gradient-methods-for-reinforcement-learning-with-function-approximation.pdf>`_ Paper authors: Richard S. Sutton, David McAllester, Satinder Singh, Yishay Mansour Model implemented by: - `Donal Byrne <https://github.com/djbyrne>` Example: >>> from pl_bolts.models.rl.vanilla_policy_gradient_model import PolicyGradient ... >>> model = PolicyGradient("PongNoFrameskip-v4") Train:: trainer = Trainer() trainer.fit(model) Args: env: gym environment tag gamma: discount factor lr: learning rate batch_size: size of minibatch pulled from the DataLoader batch_episodes: how many episodes to rollout for each batch of training entropy_beta: dictates the level of entropy per batch .. note:: This example is based on: https://github.com/PacktPublishing/Deep-Reinforcement-Learning-Hands-On-Second-Edition\ /blob/master/Chapter11/04_cartpole_pg.py .. note:: Currently only supports CPU and single GPU training with `distributed_backend=dp` """ super().__init__() # self.env = wrappers.make_env(self.hparams.env) # use for Atari self.env = ToTensor(gym.make(env)) # use for Box2D/Control self.env.seed(123) self.obs_shape = self.env.observation_space.shape self.n_actions = self.env.action_space.n self.net = None self.build_networks() self.agent = PolicyAgent(self.net) self.source = NStepExperienceSource(env=self.env, agent=self.agent, n_steps=10) self.gamma = gamma self.lr = lr self.batch_size = batch_size self.batch_episodes = batch_episodes self.entropy_beta = entropy_beta self.baseline = 0 # Metrics self.reward_sum = 0 self.env_steps = 0 self.total_steps = 0 self.total_reward = 0 self.episode_count = 0 self.reward_list = [] for _ in range(100): self.reward_list.append(torch.tensor(0, device=self.device)) self.avg_reward = 0
def setUp(self) -> None: self.env = ToTensor(gym.make("CartPole-v0")) self.net = Mock() self.agent = Agent(self.net) self.xp_stream = EpisodicExperienceStream(self.env, self.agent, device=Mock(), episodes=4) self.rl_dataloader = DataLoader(self.xp_stream)
def __init__(self, env: str, gamma: float = 0.99, lr: float = 1e-4, batch_size: int = 32, entropy_beta: float = 0.01, batch_episodes: int = 4, *args, **kwargs) -> None: """ PyTorch Lightning implementation of `Vanilla Policy Gradient <https://papers.nips.cc/paper/ 1713-policy-gradient-methods-for-rl-learning-with-function-approximation.pdf>`_ Paper authors: Richard S. Sutton, David McAllester, Satinder Singh, Yishay Mansour Model implemented by: - `Donal Byrne <https://github.com/djbyrne>` Example: >>> from pl_bolts.models.rl.vanilla_policy_gradient.model import PolicyGradient ... >>> model = PolicyGradient("PongNoFrameskip-v4") Train:: trainer = Trainer() trainer.fit(model) Args: env: gym environment tag gamma: discount factor lr: learning rate batch_size: size of minibatch pulled from the DataLoader batch_episodes: how many episodes to rollout for each batch of training entropy_beta: dictates the level of entropy per batch """ super().__init__() # self.env = wrappers.make_env(self.hparams.env) # use for Atari self.env = ToTensor(gym.make(env)) # use for Box2D/Control self.env.seed(123) self.obs_shape = self.env.observation_space.shape self.n_actions = self.env.action_space.n self.net = None self.build_networks() self.agent = PolicyAgent(self.net) self.gamma = gamma self.lr = lr self.batch_size = batch_size self.batch_episodes = batch_episodes self.total_reward = 0 self.episode_reward = 0 self.episode_count = 0 self.episode_steps = 0 self.total_episode_steps = 0 self.entropy_beta = entropy_beta self.reward_list = [] for _ in range(100): self.reward_list.append(0) self.avg_reward = 0
def setUp(self) -> None: self.env = ToTensor(gym.make("CartPole-v0"))