TEST_EVERY_BATCH = 100 if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-n", "--name", required=True, help="Name of the run") parser.add_argument("--cuda", default=False, action="store_true", help="Enable CUDA") parser.add_argument("--em", required=True, help="Environment model file name") parser.add_argument("--seed", type=int, default=common.DEFAULT_SEED, help="Random seed to use, default=%d" % common.DEFAULT_SEED) args = parser.parse_args() device = torch.device("cuda" if args.cuda else "cpu") saves_path = os.path.join("saves", "03_i2a_" + args.name) os.makedirs(saves_path, exist_ok=True) envs = [common.make_env() for _ in range(common.NUM_ENVS)] test_env = common.make_env(test=True) if args.seed: common.set_seed(args.seed, envs, cuda=args.cuda) suffix = "-seed=%d" % args.seed else: suffix = "" writer = SummaryWriter(comment="-03_i2a_" + args.name + suffix) obs_shape = envs[0].observation_space.shape act_n = envs[0].action_space.n net_policy = common.AtariA2C(obs_shape, act_n).to(device) net_em = i2a.EnvironmentModel(obs_shape, act_n)
if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--cuda", default=True, action="store_true", help="Enable cuda") parser.add_argument("-n", "--name", required=True, help="Name of the run") parser.add_argument("--seed", type=int, default=common.DEFAULT_SEED, help="Random seed to use, default=%d" % common.DEFAULT_SEED) parser.add_argument("--steps", type=int, default=None, help="Limit of training steps, default=disabled") args = parser.parse_args() device = torch.device("cuda" if args.cuda else "cpu") print('device: ', device, ) saves_path = os.path.join("saves", "01_a2c_" + args.name) os.makedirs(saves_path, exist_ok=True) envs = [common.make_env() for _ in range(common.NUM_ENVS)] if args.seed: common.set_seed(args.seed, envs, cuda=args.cuda) suffix = "-seed=%d" % args.seed else: suffix = "" test_env = common.make_env(test=True) writer = SummaryWriter(comment="-01_a2c_" + args.name + suffix) net = common.AtariA2C(envs[0].observation_space.shape, envs[0].action_space.n).to(device) print(net) optimizer = optim.RMSprop(net.parameters(), lr=LEARNING_RATE, eps=1e-5) step_idx = 0 total_steps = 0
total_steps[e_idx] = 0 obs[e_idx] = o if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--cuda", default=False, action="store_true", help="Enable cuda") parser.add_argument("-n", "--name", required=True, help="Name of the run") parser.add_argument("-m", "--model", required=True, help="File with model to load") args = parser.parse_args() device = torch.device("cuda" if args.cuda else "cpu") saves_path = os.path.join("saves", "02_env_" + args.name) os.makedirs(saves_path, exist_ok=True) envs = [common.make_env() for _ in range(NUM_ENVS)] writer = SummaryWriter(comment="-02_env_" + args.name) net = common.AtariA2C(envs[0].observation_space.shape, envs[0].action_space.n) net_em = i2a.EnvironmentModel(envs[0].observation_space.shape, envs[0].action_space.n).to(device) net.load_state_dict(torch.load(args.model, map_location=lambda storage, loc: storage)) net = net.to(device) print(net_em) optimizer = optim.Adam(net_em.parameters(), lr=LEARNING_RATE) step_idx = 0 best_loss = np.inf with ptan.common.utils.TBMeanTracker(writer, batch_size=100) as tb_tracker: for mb_obs, mb_obs_next, mb_actions, mb_rewards, done_rewards, done_steps in iterate_batches(envs, net, device): if len(done_rewards) > 0: m_reward = np.mean(done_rewards)