import torch import wandb from dqn.buffer import Buffer from common.load_cfg import load_cfg from env import make_vec_envs from dqn import actor_iter, DQN from predictor import Predictor if __name__ == "__main__": parser = argparse.ArgumentParser(description="") parser.add_argument("--size", type=int, default=3) parser.add_argument("--add_ri", action="store_true") parser.add_argument("--random", action="store_true") p = parser.parse_args() cfg = load_cfg("default") cfg.update(vars(p)) cfg["env"] = "pol" wandb.init(project="lwm", config=cfg) num_env = 1 envs = make_vec_envs( num=num_env, size=cfg["size"], max_ep_len=cfg["train"]["max_ep_len"], ) buffer = Buffer( num_env=num_env, maxlen=int(cfg["buffer"]["size"] / num_env), obs_shape=(4, ), device=cfg["buffer"]["device"],
from dqn.buffer import Buffer from common.load_cfg import load_cfg from atari import make_vec_envs from dqn import actor_iter, Learner, DQN from repre.w_mse import WMSE from repre.predictor import Predictor if __name__ == "__main__": parser = argparse.ArgumentParser(description="") parser.add_argument("--cfg", type=str, default="default") parser.add_argument("--env", type=str, default="MontezumaRevenge") parser.add_argument("--seed", type=int, default=0) parser.add_argument("--ri_scale", type=float, default=1) p = parser.parse_args() cfg = load_cfg(p.cfg) cfg.update(vars(p)) wandb.init(project="lwm", config=cfg) num_env = cfg["agent"]["actors"] fstack = cfg["agent"]["frame_stack"] envs = make_vec_envs(cfg["env"], num_env, cfg["seed"], cfg["train"]["max_ep_len"]) buffer = Buffer( num_env=num_env, maxlen=int(cfg["buffer"]["size"] / num_env), obs_shape=envs.observation_space.shape, device=cfg["buffer"]["device"], ) model = DQN(envs.action_space.n, fstack).cuda().train() wmse = WMSE(buffer, cfg)