plt.savefig(fname) for i in range(0, len(x), 10): plt.text( x[i], y[i], str(i), horizontalalignment="center", verticalalignment="center", fontsize=14, ) plt.savefig("num_" + fname) if __name__ == "__main__": name = "MontezumaRevenge" if len(sys.argv) < 2 else sys.argv[1] env = make_vec_envs(name, 1) conv_wmse = mnih_cnn(1, 32) conv_idf = mnih_cnn(1, 32) conv_cpc = mnih_cnn(1, 32) conv_rnd = mnih_cnn(1, 32) conv_wmse.load_state_dict( torch.load("models/conv_wmse.pt", map_location="cpu")) conv_idf.load_state_dict( torch.load("models/conv_idf.pt", map_location="cpu")) conv_cpc.load_state_dict( torch.load("models/conv_cpc.pt", map_location="cpu")) conv_wmse.eval(), conv_idf.eval(), conv_cpc.eval(), conv_rnd.eval() mem = torch.empty(4, 1000, 32) cursor = 0
from repre.inverse_dynamics import IDF from repre.cpc import CPC if __name__ == "__main__": parser = argparse.ArgumentParser(description="") parser.add_argument("--cfg", type=str, default="default") parser.add_argument("--env", type=str, default="MontezumaRevenge") p = parser.parse_args() cfg = load_cfg(p.cfg) cfg.update(vars(p)) wandb.init(project="lwm", config=cfg) num_env = cfg["agent"]["actors"] fstack = cfg["agent"]["frame_stack"] envs = make_vec_envs(cfg["env"], num_env, max_ep_len=cfg["train"]["max_ep_len"]) num_action = envs.action_space.n buffer = Buffer( num_env=num_env, maxlen=int(cfg["buffer"]["size"] / num_env), obs_shape=envs.observation_space.shape, device=cfg["buffer"]["device"], ) wmse = WMSE(buffer, cfg) idf = IDF(buffer=buffer, num_action=num_action) cpc = CPC(buffer=buffer, num_action=num_action) actor = actor_iter(envs, None, None, cfg["buffer"]["warmup"], eps=1) pretrain = int(cfg["buffer"]["warmup"] / num_env)
if __name__ == "__main__": parser = argparse.ArgumentParser(description="") parser.add_argument("--cfg", type=str, default="default") parser.add_argument("--env", type=str, default="MontezumaRevenge") parser.add_argument("--seed", type=int, default=0) parser.add_argument("--ri_scale", type=float, default=1) p = parser.parse_args() cfg = load_cfg(p.cfg) cfg.update(vars(p)) wandb.init(project="lwm", config=cfg) num_env = cfg["agent"]["actors"] fstack = cfg["agent"]["frame_stack"] envs = make_vec_envs(cfg["env"], num_env, cfg["seed"], cfg["train"]["max_ep_len"]) buffer = Buffer( num_env=num_env, maxlen=int(cfg["buffer"]["size"] / num_env), obs_shape=envs.observation_space.shape, device=cfg["buffer"]["device"], ) model = DQN(envs.action_space.n, fstack).cuda().train() wmse = WMSE(buffer, cfg) pred = Predictor(buffer, wmse.encoder, envs.action_space.n, cfg) learner = Learner(model, buffer, pred, cfg) actor = actor_iter( envs, model, pred, cfg["buffer"]["warmup"], eps=cfg["agent"].get("eps") )
from repre.predictor import Predictor if __name__ == "__main__": parser = argparse.ArgumentParser(description="") parser.add_argument("--cfg", type=str, default="default") parser.add_argument("--env", type=str, default="MontezumaRevenge") parser.add_argument("--seed", type=int, default=0) parser.add_argument("--ri_scale", type=float, default=1) p = parser.parse_args() cfg = load_cfg(p.cfg) cfg.update(vars(p)) wandb.init(project="lwm", config=cfg) num_env = cfg["agent"]["actors"] fstack = cfg["agent"]["frame_stack"] envs = make_vec_envs(cfg["env"], num_env, cfg["seed"]) buffer = Buffer( num_env=num_env, maxlen=int(cfg["buffer"]["size"] / num_env), obs_shape=envs.observation_space.shape, device=cfg["buffer"]["device"], ) model = DQN(envs.action_space.n, fstack).cuda().train() wmse = WMSE(buffer, cfg) pred = Predictor(buffer, wmse.encoder, envs.action_space.n, cfg) actor = actor_iter(envs, model, pred, 0, eps=0.001) wmse.load(), pred.load() cp = torch.load("models/dqn.pt", map_location="cuda") model.load_state_dict(cp)