Ejemplo n.º 1
0
    def __init__(
        self,
        network_type,
        env,
        timesteps_per_actorbatch=1000,
        gamma=0.99,
        actor_batch_size=4,
        epochs=1000,
        lr_policy=0.01,
        lr_value=0.0005,
        policy_copy_interval=20,
        pretrained=None,
        layers=(32, 32),
        tensorboard_log=None,
        seed=None,
        render=False,
        device="cpu",
        run_num=None,
        save_model=None,
        save_interval=50,
    ):
        self.network_type = network_type
        self.env = env
        self.timesteps_per_actorbatch = timesteps_per_actorbatch
        self.gamma = gamma
        self.actor_batch_size = actor_batch_size
        self.epochs = epochs
        self.lr_policy = lr_policy
        self.lr_value = lr_value
        self.tensorboard_log = tensorboard_log
        self.seed = seed
        self.render = render
        self.policy_copy_interval = policy_copy_interval
        self.evaluate = evaluate
        self.save_interval = save_interval
        self.pretrained = pretrained
        self.layers = layers
        self.run_num = run_num
        self.save_model = save_model
        self.save = save_params
        self.load = load_params

        # Assign device
        if "cuda" in device and torch.cuda.is_available():
            self.device = torch.device(device)
        else:
            self.device = torch.device("cpu")

        # Assign seed
        if seed is not None:
            set_seeds(seed, self.env)

        # init writer if tensorboard
        self.writer = None
        if self.tensorboard_log is not None:  # pragma: no cover
            from torch.utils.tensorboard import SummaryWriter

            self.writer = SummaryWriter(log_dir=self.tensorboard_log)

        self.create_model()
Ejemplo n.º 2
0
    def __init__(
        self,
        network_type: str,
        env: Union[gym.Env, VecEnv],
        gamma: float = 0.99,
        replay_size: int = 1000000,
        batch_size: int = 100,
        lr_p: float = 0.0001,
        lr_q: float = 0.001,
        polyak: float = 0.995,
        epochs: int = 100,
        start_steps: int = 10000,
        steps_per_epoch: int = 4000,
        noise: Optional[Any] = None,
        noise_std: float = 0.1,
        max_ep_len: int = 1000,
        start_update: int = 1000,
        update_interval: int = 50,
        layers: Tuple = (32, 32),
        seed: Optional[int] = None,
        render: bool = False,
        device: Union[torch.device, str] = "cpu",
    ):

        self.network_type = network_type
        self.env = env
        self.gamma = gamma
        self.replay_size = replay_size
        self.batch_size = batch_size
        self.lr_p = lr_p
        self.lr_q = lr_q
        self.polyak = polyak
        self.epochs = epochs
        self.start_steps = start_steps
        self.steps_per_epoch = steps_per_epoch
        self.noise = noise
        self.noise_std = noise_std
        self.max_ep_len = max_ep_len
        self.start_update = start_update
        self.update_interval = update_interval
        self.layers = layers
        self.seed = seed
        self.render = render

        # Assign device
        if "cuda" in device and torch.cuda.is_available():
            self.device = torch.device(device)
        else:
            self.device = torch.device("cpu")

        # Assign seed
        if seed is not None:
            set_seeds(seed, self.env)

        # Setup tensorboard writer
        self.writer = None

        self.empty_logs()
        self.create_model()
Ejemplo n.º 3
0
    def __init__(
        self,
        network_type: str,
        env: Union[gym.Env, VecEnv],
        gamma: float = 0.99,
        replay_size: int = 1000000,
        batch_size: int = 256,
        lr: float = 3e-4,
        alpha: float = 0.01,
        polyak: float = 0.995,
        entropy_tuning: bool = True,
        epochs: int = 1000,
        start_steps: int = 0,
        steps_per_epoch: int = 1000,
        max_ep_len: int = 1000,
        start_update: int = 256,
        update_interval: int = 1,
        layers: Tuple = (256, 256),
        seed: Optional[int] = None,
        render: bool = False,
        device: Union[torch.device, str] = "cpu",
    ):

        self.network_type = network_type
        self.env = env
        self.gamma = gamma
        self.replay_size = replay_size
        self.batch_size = batch_size
        self.lr = lr
        self.alpha = alpha
        self.polyak = polyak
        self.entropy_tuning = entropy_tuning
        self.epochs = epochs
        self.start_steps = start_steps
        self.steps_per_epoch = steps_per_epoch
        self.max_ep_len = max_ep_len
        self.start_update = start_update
        self.update_interval = update_interval
        self.layers = layers
        self.seed = seed
        self.render = render

        # Assign device
        if "cuda" in device and torch.cuda.is_available():
            self.device = torch.device(device)
        else:
            self.device = torch.device("cpu")

        # Assign seed
        if seed is not None:
            set_seeds(seed, self.env)

        # Setup tensorboard writer
        self.writer = None

        self.empty_logs()
        self.create_model()
Ejemplo n.º 4
0
 def __init__(
     self,
     agent,
     env,
     logger,
     buffer=None,
     off_policy=False,
     save_interval=0,
     render=False,
     max_ep_len=1000,
     distributed=False,
     ckpt_log_name="experiment",
     steps_per_epoch=4000,
     epochs=10,
     device="cpu",
     log_interval=10,
     batch_size=50,
     seed=None,
     deterministic_actions=False,
     transform=None,
     history_length=4,
 ):
     self.agent = agent
     self.env = env
     self.logger = logger
     self.off_policy = off_policy
     if self.off_policy and buffer is None:
         if self.agent.replay_buffer is None:
             raise Exception("Off Policy Training requires a Replay Buffer")
         else:
             self.buffer = self.agent.replay_buffer
     self.save_interval = save_interval
     self.render = render
     self.max_ep_len = max_ep_len
     self.ckpt_log_name = ckpt_log_name
     self.steps_per_epoch = steps_per_epoch
     self.epochs = epochs
     self.device = device
     self.log_interval = log_interval
     self.batch_size = batch_size
     self.deterministic_actions = deterministic_actions
     self.transform = transform
     self.history_length = history_length
     if seed is not None:
         set_seeds(seed, self.env)
Ejemplo n.º 5
0
    def __init__(
        self,
        network_type,
        env,
        gamma=0.99,
        replay_size=1000000,
        batch_size=256,
        lr=3e-4,
        alpha=0.01,
        polyak=0.995,
        entropy_tuning=True,
        epochs=1000,
        start_steps=0,
        steps_per_epoch=1000,
        max_ep_len=1000,
        start_update=256,
        update_interval=1,
        layers=(256, 256),
        pretrained=None,
        tensorboard_log=None,
        seed=None,
        render=False,
        device="cpu",
        run_num=None,
        save_model=None,
        save_interval=5000,
    ):

        self.network_type = network_type
        self.env = env
        self.gamma = gamma
        self.replay_size = replay_size
        self.batch_size = batch_size
        self.lr = lr
        self.alpha = alpha
        self.polyak = polyak
        self.entropy_tuning = entropy_tuning
        self.epochs = epochs
        self.start_steps = start_steps
        self.steps_per_epoch = steps_per_epoch
        self.max_ep_len = max_ep_len
        self.start_update = start_update
        self.update_interval = update_interval
        self.save_interval = save_interval
        self.layers = layers
        self.pretrained = pretrained
        self.tensorboard_log = tensorboard_log
        self.seed = seed
        self.render = render
        self.run_num = run_num
        self.save_model = save_model
        self.save = save_params
        self.load = load_params
        self.evaluate = evaluate

        # Assign device
        if "cuda" in device and torch.cuda.is_available():
            self.device = torch.device(device)
        else:
            self.device = torch.device("cpu")

        # Assign seed
        if seed is not None:
            set_seeds(seed, self.env)

        # Setup tensorboard writer
        self.writer = None
        if self.tensorboard_log is not None:  # pragma: no cover
            from torch.utils.tensorboard import SummaryWriter

            self.writer = SummaryWriter(log_dir=self.tensorboard_log)

        self.create_model()
Ejemplo n.º 6
0
    def __init__(
        self,
        network_type,
        env,
        double_dqn=False,
        dueling_dqn=False,
        noisy_dqn=False,
        categorical_dqn=False,
        prioritized_replay=False,
        epochs=100,
        max_iterations_per_epoch=100,
        max_ep_len=1000,
        gamma=0.99,
        lr=0.001,
        batch_size=32,
        replay_size=100,
        prioritized_replay_alpha=0.6,
        max_epsilon=1.0,
        min_epsilon=0.01,
        epsilon_decay=1000,
        num_atoms=51,
        Vmin=-10,
        Vmax=10,
        tensorboard_log=None,
        seed=None,
        render=False,
        device="cpu",
        save_interval=5000,
        pretrained=None,
        run_num=None,
        save_model=None,
        transform=None,
    ):
        self.env = env
        self.double_dqn = double_dqn
        self.dueling_dqn = dueling_dqn
        self.noisy_dqn = noisy_dqn
        self.categorical_dqn = categorical_dqn
        self.prioritized_replay = prioritized_replay
        self.max_epochs = epochs
        self.max_iterations_per_epoch = max_iterations_per_epoch
        self.max_ep_len = max_ep_len
        self.replay_size = replay_size
        self.prioritized_replay_alpha = prioritized_replay_alpha
        self.lr = lr
        self.gamma = gamma
        self.batch_size = batch_size
        self.num_atoms = num_atoms
        self.Vmin = Vmin
        self.Vmax = Vmax
        self.tensorboard_log = tensorboard_log
        self.render = render
        self.loss_hist = []
        self.reward_hist = []
        self.max_epsilon = max_epsilon
        self.min_epsilon = min_epsilon
        self.epsilon_decay = epsilon_decay
        self.evaluate = evaluate
        self.run_num = run_num
        self.save_model = save_model
        self.save_interval = save_interval
        self.save = save_params
        self.load = load_params
        self.pretrained = pretrained
        self.network_type = network_type
        self.history_length = None
        self.transform = transform

        # Assign device
        if "cuda" in device and torch.cuda.is_available():
            self.device = torch.device(device)
        else:
            self.device = torch.device("cpu")

        # Assign seed
        if seed is not None:
            set_seeds(seed, self.env)

        # Setup tensorboard writer
        self.writer = None
        if self.tensorboard_log is not None:  # pragma: no cover
            from torch.utils.tensorboard import SummaryWriter

            self.writer = SummaryWriter(log_dir=self.tensorboard_log)

        self.create_model()
Ejemplo n.º 7
0
    def __init__(
        self,
        network_type: str,
        env: Union[gym.Env, venv],
        gamma: float = 0.99,
        actor_batch_size: int = 64,
        lr_actor: float = 0.01,
        lr_critic: float = 0.1,
        num_episodes: int = 100,
        timesteps_per_actorbatch: int = 4000,
        max_ep_len: int = 1000,
        layers: Tuple = (32, 32),
        noise: Any = None,
        noise_std: float = 0.1,
        tensorboard_log: str = None,
        seed: Optional[int] = None,
        render: bool = False,
        device: Union[torch.device, str] = "cpu",
        run_num: int = None,
        save_model: str = None,
        save_interval: int = 1000,
    ):
        self.network_type = network_type
        self.env = env
        self.gamma = gamma
        self.actor_batch_size = actor_batch_size
        self.lr_actor = lr_actor
        self.lr_critic = lr_critic
        self.num_episodes = num_episodes
        self.timesteps_per_actorbatch = timesteps_per_actorbatch
        self.max_ep_len = max_ep_len
        self.layers = layers
        self.noise = noise
        self.noise_std = noise_std
        self.tensorboard_log = tensorboard_log
        self.seed = seed
        self.render = render
        self.run_num = run_num
        self.save_interval = save_interval
        self.save_model = None
        self.save = save_params
        self.load = load_params

        # Assign device
        if "cuda" in device and torch.cuda.is_available():
            self.device = torch.device("cuda")
        else:
            self.device = torch.device("cpu")

        # Assign seed
        if seed is not None:
            set_seeds(seed, self.env)

        # Setup tensorboard writer
        self.writer = None
        if self.tensorboard_log is not None:  # pragma: no cover
            from torch.utils.tensorboard import SummaryWriter

            self.writer = SummaryWriter(log_dir=self.tensorboard_log)

        self.create_model()
Ejemplo n.º 8
0
    def __init__(
        self,
        network_type,
        env,
        gamma=0.99,
        replay_size=1000000,
        batch_size=100,
        lr_p=0.001,
        lr_q=0.001,
        polyak=0.995,
        policy_frequency=2,
        epochs=100,
        start_steps=10000,
        steps_per_epoch=4000,
        noise=None,
        noise_std=0.1,
        pretrained=None,
        max_ep_len=1000,
        start_update=1000,
        update_interval=50,
        layers=(256, 256),
        tensorboard_log=None,
        seed=None,
        render=False,
        device="cpu",
        run_num=None,
        save_model=None,
        save_interval=5000,
    ):

        self.network_type = network_type
        self.env = env
        self.gamma = gamma
        self.replay_size = replay_size
        self.batch_size = batch_size
        self.lr_p = lr_p
        self.lr_q = lr_q
        self.polyak = polyak
        self.policy_frequency = policy_frequency
        self.epochs = epochs
        self.start_steps = start_steps
        self.steps_per_epoch = steps_per_epoch
        self.noise = noise
        self.noise_std = noise_std
        self.max_ep_len = max_ep_len
        self.start_update = start_update
        self.update_interval = update_interval
        self.save_interval = save_interval
        self.layers = layers
        self.tensorboard_log = tensorboard_log
        self.pretrained = pretrained
        self.seed = seed
        self.render = render
        self.evaluate = evaluate
        self.run_num = run_num
        self.save_model = save_model
        self.save = save_params
        self.load = load_params

        # Assign device
        if "cuda" in device and torch.cuda.is_available():
            self.device = torch.device(device)
        else:
            self.device = torch.device("cpu")

        # Assign seed
        if seed is not None:
            set_seeds(seed, self.env)

        # Setup tensorboard writer
        self.writer = None
        if self.tensorboard_log is not None:  # pragma: no cover
            from torch.utils.tensorboard import SummaryWriter

            self.writer = SummaryWriter(log_dir=self.tensorboard_log)

        self.create_model()
        self.checkpoint = self.get_hyperparams()
Ejemplo n.º 9
0
    def __init__(
        self,
        network_type: str,
        env: Union[gym.Env, VecEnv],
        double_dqn: bool = False,
        dueling_dqn: bool = False,
        noisy_dqn: bool = False,
        categorical_dqn: bool = False,
        prioritized_replay: bool = False,
        epochs: int = 100,
        max_iterations_per_epoch: int = 100,
        max_ep_len: int = 1000,
        gamma: float = 0.99,
        lr: float = 0.001,
        batch_size: int = 32,
        replay_size: int = 100,
        prioritized_replay_alpha: float = 0.6,
        max_epsilon: float = 1.0,
        min_epsilon: float = 0.01,
        epsilon_decay: int = 1000,
        num_atoms: int = 51,
        vmin: int = -10,
        vmax: int = 10,
        seed: Optional[int] = None,
        render: bool = False,
        device: Union[torch.device, str] = "cpu",
    ):
        self.env = env
        self.double_dqn = double_dqn
        self.dueling_dqn = dueling_dqn
        self.noisy_dqn = noisy_dqn
        self.categorical_dqn = categorical_dqn
        self.prioritized_replay = prioritized_replay
        self.max_epochs = epochs
        self.max_iterations_per_epoch = max_iterations_per_epoch
        self.max_ep_len = max_ep_len
        self.replay_size = replay_size
        self.prioritized_replay_alpha = prioritized_replay_alpha
        self.lr = lr
        self.gamma = gamma
        self.batch_size = batch_size
        self.num_atoms = num_atoms
        self.Vmin = vmin
        self.Vmax = vmax
        self.render = render
        self.reward_hist = []
        self.max_epsilon = max_epsilon
        self.min_epsilon = min_epsilon
        self.epsilon_decay = epsilon_decay
        self.network_type = network_type

        # Assign device
        if "cuda" in device and torch.cuda.is_available():
            self.device = torch.device(device)
        else:
            self.device = torch.device("cpu")

        # Assign seed
        if seed is not None:
            set_seeds(seed, self.env)

        # Setup tensorboard writer
        self.writer = None

        self.empty_logs()
        self.create_model()