def __init__( self, network_type, env, timesteps_per_actorbatch=1000, gamma=0.99, actor_batch_size=4, epochs=1000, lr_policy=0.01, lr_value=0.0005, policy_copy_interval=20, pretrained=None, layers=(32, 32), tensorboard_log=None, seed=None, render=False, device="cpu", run_num=None, save_model=None, save_interval=50, ): self.network_type = network_type self.env = env self.timesteps_per_actorbatch = timesteps_per_actorbatch self.gamma = gamma self.actor_batch_size = actor_batch_size self.epochs = epochs self.lr_policy = lr_policy self.lr_value = lr_value self.tensorboard_log = tensorboard_log self.seed = seed self.render = render self.policy_copy_interval = policy_copy_interval self.evaluate = evaluate self.save_interval = save_interval self.pretrained = pretrained self.layers = layers self.run_num = run_num self.save_model = save_model self.save = save_params self.load = load_params # Assign device if "cuda" in device and torch.cuda.is_available(): self.device = torch.device(device) else: self.device = torch.device("cpu") # Assign seed if seed is not None: set_seeds(seed, self.env) # init writer if tensorboard self.writer = None if self.tensorboard_log is not None: # pragma: no cover from torch.utils.tensorboard import SummaryWriter self.writer = SummaryWriter(log_dir=self.tensorboard_log) self.create_model()
def __init__( self, network_type: str, env: Union[gym.Env, VecEnv], gamma: float = 0.99, replay_size: int = 1000000, batch_size: int = 100, lr_p: float = 0.0001, lr_q: float = 0.001, polyak: float = 0.995, epochs: int = 100, start_steps: int = 10000, steps_per_epoch: int = 4000, noise: Optional[Any] = None, noise_std: float = 0.1, max_ep_len: int = 1000, start_update: int = 1000, update_interval: int = 50, layers: Tuple = (32, 32), seed: Optional[int] = None, render: bool = False, device: Union[torch.device, str] = "cpu", ): self.network_type = network_type self.env = env self.gamma = gamma self.replay_size = replay_size self.batch_size = batch_size self.lr_p = lr_p self.lr_q = lr_q self.polyak = polyak self.epochs = epochs self.start_steps = start_steps self.steps_per_epoch = steps_per_epoch self.noise = noise self.noise_std = noise_std self.max_ep_len = max_ep_len self.start_update = start_update self.update_interval = update_interval self.layers = layers self.seed = seed self.render = render # Assign device if "cuda" in device and torch.cuda.is_available(): self.device = torch.device(device) else: self.device = torch.device("cpu") # Assign seed if seed is not None: set_seeds(seed, self.env) # Setup tensorboard writer self.writer = None self.empty_logs() self.create_model()
def __init__( self, network_type: str, env: Union[gym.Env, VecEnv], gamma: float = 0.99, replay_size: int = 1000000, batch_size: int = 256, lr: float = 3e-4, alpha: float = 0.01, polyak: float = 0.995, entropy_tuning: bool = True, epochs: int = 1000, start_steps: int = 0, steps_per_epoch: int = 1000, max_ep_len: int = 1000, start_update: int = 256, update_interval: int = 1, layers: Tuple = (256, 256), seed: Optional[int] = None, render: bool = False, device: Union[torch.device, str] = "cpu", ): self.network_type = network_type self.env = env self.gamma = gamma self.replay_size = replay_size self.batch_size = batch_size self.lr = lr self.alpha = alpha self.polyak = polyak self.entropy_tuning = entropy_tuning self.epochs = epochs self.start_steps = start_steps self.steps_per_epoch = steps_per_epoch self.max_ep_len = max_ep_len self.start_update = start_update self.update_interval = update_interval self.layers = layers self.seed = seed self.render = render # Assign device if "cuda" in device and torch.cuda.is_available(): self.device = torch.device(device) else: self.device = torch.device("cpu") # Assign seed if seed is not None: set_seeds(seed, self.env) # Setup tensorboard writer self.writer = None self.empty_logs() self.create_model()
def __init__( self, agent, env, logger, buffer=None, off_policy=False, save_interval=0, render=False, max_ep_len=1000, distributed=False, ckpt_log_name="experiment", steps_per_epoch=4000, epochs=10, device="cpu", log_interval=10, batch_size=50, seed=None, deterministic_actions=False, transform=None, history_length=4, ): self.agent = agent self.env = env self.logger = logger self.off_policy = off_policy if self.off_policy and buffer is None: if self.agent.replay_buffer is None: raise Exception("Off Policy Training requires a Replay Buffer") else: self.buffer = self.agent.replay_buffer self.save_interval = save_interval self.render = render self.max_ep_len = max_ep_len self.ckpt_log_name = ckpt_log_name self.steps_per_epoch = steps_per_epoch self.epochs = epochs self.device = device self.log_interval = log_interval self.batch_size = batch_size self.deterministic_actions = deterministic_actions self.transform = transform self.history_length = history_length if seed is not None: set_seeds(seed, self.env)
def __init__( self, network_type, env, gamma=0.99, replay_size=1000000, batch_size=256, lr=3e-4, alpha=0.01, polyak=0.995, entropy_tuning=True, epochs=1000, start_steps=0, steps_per_epoch=1000, max_ep_len=1000, start_update=256, update_interval=1, layers=(256, 256), pretrained=None, tensorboard_log=None, seed=None, render=False, device="cpu", run_num=None, save_model=None, save_interval=5000, ): self.network_type = network_type self.env = env self.gamma = gamma self.replay_size = replay_size self.batch_size = batch_size self.lr = lr self.alpha = alpha self.polyak = polyak self.entropy_tuning = entropy_tuning self.epochs = epochs self.start_steps = start_steps self.steps_per_epoch = steps_per_epoch self.max_ep_len = max_ep_len self.start_update = start_update self.update_interval = update_interval self.save_interval = save_interval self.layers = layers self.pretrained = pretrained self.tensorboard_log = tensorboard_log self.seed = seed self.render = render self.run_num = run_num self.save_model = save_model self.save = save_params self.load = load_params self.evaluate = evaluate # Assign device if "cuda" in device and torch.cuda.is_available(): self.device = torch.device(device) else: self.device = torch.device("cpu") # Assign seed if seed is not None: set_seeds(seed, self.env) # Setup tensorboard writer self.writer = None if self.tensorboard_log is not None: # pragma: no cover from torch.utils.tensorboard import SummaryWriter self.writer = SummaryWriter(log_dir=self.tensorboard_log) self.create_model()
def __init__( self, network_type, env, double_dqn=False, dueling_dqn=False, noisy_dqn=False, categorical_dqn=False, prioritized_replay=False, epochs=100, max_iterations_per_epoch=100, max_ep_len=1000, gamma=0.99, lr=0.001, batch_size=32, replay_size=100, prioritized_replay_alpha=0.6, max_epsilon=1.0, min_epsilon=0.01, epsilon_decay=1000, num_atoms=51, Vmin=-10, Vmax=10, tensorboard_log=None, seed=None, render=False, device="cpu", save_interval=5000, pretrained=None, run_num=None, save_model=None, transform=None, ): self.env = env self.double_dqn = double_dqn self.dueling_dqn = dueling_dqn self.noisy_dqn = noisy_dqn self.categorical_dqn = categorical_dqn self.prioritized_replay = prioritized_replay self.max_epochs = epochs self.max_iterations_per_epoch = max_iterations_per_epoch self.max_ep_len = max_ep_len self.replay_size = replay_size self.prioritized_replay_alpha = prioritized_replay_alpha self.lr = lr self.gamma = gamma self.batch_size = batch_size self.num_atoms = num_atoms self.Vmin = Vmin self.Vmax = Vmax self.tensorboard_log = tensorboard_log self.render = render self.loss_hist = [] self.reward_hist = [] self.max_epsilon = max_epsilon self.min_epsilon = min_epsilon self.epsilon_decay = epsilon_decay self.evaluate = evaluate self.run_num = run_num self.save_model = save_model self.save_interval = save_interval self.save = save_params self.load = load_params self.pretrained = pretrained self.network_type = network_type self.history_length = None self.transform = transform # Assign device if "cuda" in device and torch.cuda.is_available(): self.device = torch.device(device) else: self.device = torch.device("cpu") # Assign seed if seed is not None: set_seeds(seed, self.env) # Setup tensorboard writer self.writer = None if self.tensorboard_log is not None: # pragma: no cover from torch.utils.tensorboard import SummaryWriter self.writer = SummaryWriter(log_dir=self.tensorboard_log) self.create_model()
def __init__( self, network_type: str, env: Union[gym.Env, venv], gamma: float = 0.99, actor_batch_size: int = 64, lr_actor: float = 0.01, lr_critic: float = 0.1, num_episodes: int = 100, timesteps_per_actorbatch: int = 4000, max_ep_len: int = 1000, layers: Tuple = (32, 32), noise: Any = None, noise_std: float = 0.1, tensorboard_log: str = None, seed: Optional[int] = None, render: bool = False, device: Union[torch.device, str] = "cpu", run_num: int = None, save_model: str = None, save_interval: int = 1000, ): self.network_type = network_type self.env = env self.gamma = gamma self.actor_batch_size = actor_batch_size self.lr_actor = lr_actor self.lr_critic = lr_critic self.num_episodes = num_episodes self.timesteps_per_actorbatch = timesteps_per_actorbatch self.max_ep_len = max_ep_len self.layers = layers self.noise = noise self.noise_std = noise_std self.tensorboard_log = tensorboard_log self.seed = seed self.render = render self.run_num = run_num self.save_interval = save_interval self.save_model = None self.save = save_params self.load = load_params # Assign device if "cuda" in device and torch.cuda.is_available(): self.device = torch.device("cuda") else: self.device = torch.device("cpu") # Assign seed if seed is not None: set_seeds(seed, self.env) # Setup tensorboard writer self.writer = None if self.tensorboard_log is not None: # pragma: no cover from torch.utils.tensorboard import SummaryWriter self.writer = SummaryWriter(log_dir=self.tensorboard_log) self.create_model()
def __init__( self, network_type, env, gamma=0.99, replay_size=1000000, batch_size=100, lr_p=0.001, lr_q=0.001, polyak=0.995, policy_frequency=2, epochs=100, start_steps=10000, steps_per_epoch=4000, noise=None, noise_std=0.1, pretrained=None, max_ep_len=1000, start_update=1000, update_interval=50, layers=(256, 256), tensorboard_log=None, seed=None, render=False, device="cpu", run_num=None, save_model=None, save_interval=5000, ): self.network_type = network_type self.env = env self.gamma = gamma self.replay_size = replay_size self.batch_size = batch_size self.lr_p = lr_p self.lr_q = lr_q self.polyak = polyak self.policy_frequency = policy_frequency self.epochs = epochs self.start_steps = start_steps self.steps_per_epoch = steps_per_epoch self.noise = noise self.noise_std = noise_std self.max_ep_len = max_ep_len self.start_update = start_update self.update_interval = update_interval self.save_interval = save_interval self.layers = layers self.tensorboard_log = tensorboard_log self.pretrained = pretrained self.seed = seed self.render = render self.evaluate = evaluate self.run_num = run_num self.save_model = save_model self.save = save_params self.load = load_params # Assign device if "cuda" in device and torch.cuda.is_available(): self.device = torch.device(device) else: self.device = torch.device("cpu") # Assign seed if seed is not None: set_seeds(seed, self.env) # Setup tensorboard writer self.writer = None if self.tensorboard_log is not None: # pragma: no cover from torch.utils.tensorboard import SummaryWriter self.writer = SummaryWriter(log_dir=self.tensorboard_log) self.create_model() self.checkpoint = self.get_hyperparams()
def __init__( self, network_type: str, env: Union[gym.Env, VecEnv], double_dqn: bool = False, dueling_dqn: bool = False, noisy_dqn: bool = False, categorical_dqn: bool = False, prioritized_replay: bool = False, epochs: int = 100, max_iterations_per_epoch: int = 100, max_ep_len: int = 1000, gamma: float = 0.99, lr: float = 0.001, batch_size: int = 32, replay_size: int = 100, prioritized_replay_alpha: float = 0.6, max_epsilon: float = 1.0, min_epsilon: float = 0.01, epsilon_decay: int = 1000, num_atoms: int = 51, vmin: int = -10, vmax: int = 10, seed: Optional[int] = None, render: bool = False, device: Union[torch.device, str] = "cpu", ): self.env = env self.double_dqn = double_dqn self.dueling_dqn = dueling_dqn self.noisy_dqn = noisy_dqn self.categorical_dqn = categorical_dqn self.prioritized_replay = prioritized_replay self.max_epochs = epochs self.max_iterations_per_epoch = max_iterations_per_epoch self.max_ep_len = max_ep_len self.replay_size = replay_size self.prioritized_replay_alpha = prioritized_replay_alpha self.lr = lr self.gamma = gamma self.batch_size = batch_size self.num_atoms = num_atoms self.Vmin = vmin self.Vmax = vmax self.render = render self.reward_hist = [] self.max_epsilon = max_epsilon self.min_epsilon = min_epsilon self.epsilon_decay = epsilon_decay self.network_type = network_type # Assign device if "cuda" in device and torch.cuda.is_available(): self.device = torch.device(device) else: self.device = torch.device("cpu") # Assign seed if seed is not None: set_seeds(seed, self.env) # Setup tensorboard writer self.writer = None self.empty_logs() self.create_model()