def __init__(self, network: Any, env: Any, create_model: bool = True, batch_size: int = 64, gamma: float = 0.99, policy_layers: Tuple = (64, 64), value_layers: Tuple = (64, 64), lr_policy: float = 0.0001, lr_value: float = 0.001, **kwargs): self.network = network self.env = env self.create_model = create_model self.batch_size = batch_size self.gamma = gamma self.policy_layers = policy_layers self.value_layers = value_layers self.lr_policy = lr_policy self.lr_value = lr_value self.seed = kwargs["seed"] if "seed" in kwargs else None self.render = kwargs["render"] if "render" in kwargs else False # Assign device device = kwargs["device"] if "device" in kwargs else "cpu" if "cuda" in device and torch.cuda.is_available(): self.device = torch.device(device) else: self.device = torch.device("cpu") # Assign seed if self.seed is not None: set_seeds(self.seed, self.env)
def __init__( self, agent: Any, env: Union[gym.Env, VecEnv], log_mode: List[str] = ["stdout"], log_key: str = "timestep", log_interval: int = 10, logdir: str = "logs", epochs: int = 50, max_timesteps: int = None, off_policy: bool = False, save_interval: int = 0, save_model: str = "checkpoints", run_num: int = None, load_weights: str = None, load_hyperparams: str = None, render: bool = False, evaluate_episodes: int = 25, seed: Optional[int] = None, ): self.agent = agent self.env = env self.log_mode = log_mode self.log_key = log_key self.log_interval = log_interval self.logdir = logdir self.epochs = epochs self.max_timesteps = max_timesteps self.off_policy = off_policy self.save_interval = save_interval self.save_model = save_model self.run_num = run_num self.load_weights = load_weights self.load_hyperparams = load_hyperparams self.render = render self.evaluate_episodes = evaluate_episodes if seed is not None: set_seeds(seed, self.env) self.logger = Logger(logdir=logdir, formats=[*log_mode])
def test_set_seeds(self): set_seeds(42) sampled = random.sample([i for i in range(20)], 1)[0] assert sampled == 3