config = experiment_config.ExperimentCfg() config.make_i2a_config(parser) device = torch.device(config.DEVICE) writer = SummaryWriter(comment="_i2a_fc_" + config.build_name_for_i2a_writer()) saves_path = writer.logdir envs = [ common.makeCustomizedGridEnv(config) for _ in range(config.NUM_ENVS) ] test_env = common.makeCustomizedGridEnv(config) #sets seed on torch operations and on all environments common.set_seed(config.SEED, envs=envs) common.set_seed(config.SEED, envs=[test_env]) obs_shape = envs[0].observation_space.shape act_n = envs[0].action_space.n # net_policy = common.AtariA2C(obs_shape, act_n).to(device) net_policy = common.getNet(config) config.A2CNET = str(net_policy) net_em = models.environment_model.EnvironmentModel(obs_shape, act_n, config) # net_em.load_state_dict(torch.load(config.EM_FILE_NAME, map_location=lambda storage, loc: storage)) net_em = net_em.to(device) config.EM_NET = str(net_em)
parser = argparse.ArgumentParser() parser.add_argument("-n", "--name", required=True, help="Name of the run") parser.add_argument("--cuda", default=False, action="store_true", help="Enable CUDA") parser.add_argument("--em", required=True, help="Environment model file name") parser.add_argument("--seed", type=int, default=common.DEFAULT_SEED, help="Random seed to use, default=%d" % common.DEFAULT_SEED) args = parser.parse_args() device = torch.device("cuda" if args.cuda else "cpu") saves_path = os.path.join("saves", "03_i2a_" + args.name) os.makedirs(saves_path, exist_ok=True) envs = [common.make_env() for _ in range(common.NUM_ENVS)] test_env = common.make_env(test=True) if args.seed: common.set_seed(args.seed, envs, cuda=args.cuda) suffix = "-seed=%d" % args.seed else: suffix = "" writer = SummaryWriter(comment="-03_i2a_" + args.name + suffix) obs_shape = envs[0].observation_space.shape act_n = envs[0].action_space.n net_policy = common.AtariA2C(obs_shape, act_n).to(device) net_em = i2a.EnvironmentModel(obs_shape, act_n) net_em.load_state_dict(torch.load(args.em, map_location=lambda storage, loc: storage)) net_em = net_em.to(device) net_i2a = i2a.I2A(obs_shape, act_n, net_em, net_policy, ROLLOUTS_STEPS).to(device)
if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--cuda", default=True, action="store_true", help="Enable cuda") parser.add_argument("-n", "--name", required=True, help="Name of the run") parser.add_argument("--seed", type=int, default=common.DEFAULT_SEED, help="Random seed to use, default=%d" % common.DEFAULT_SEED) parser.add_argument("--steps", type=int, default=None, help="Limit of training steps, default=disabled") args = parser.parse_args() device = torch.device("cuda" if args.cuda else "cpu") print('device: ', device, ) saves_path = os.path.join("saves", "01_a2c_" + args.name) os.makedirs(saves_path, exist_ok=True) envs = [common.make_env() for _ in range(common.NUM_ENVS)] if args.seed: common.set_seed(args.seed, envs, cuda=args.cuda) suffix = "-seed=%d" % args.seed else: suffix = "" test_env = common.make_env(test=True) writer = SummaryWriter(comment="-01_a2c_" + args.name + suffix) net = common.AtariA2C(envs[0].observation_space.shape, envs[0].action_space.n).to(device) print(net) optimizer = optim.RMSprop(net.parameters(), lr=LEARNING_RATE, eps=1e-5) step_idx = 0 total_steps = 0 best_reward = None ts_start = time.time()
net = net.to(device) config.A2CNET = str(net) # net = common.AtariA2C(envs[0].observation_space.shape, envs[0].action_space.n) net_em = models.environment_model.EnvironmentModel( envs[0].observation_space.shape, envs[0].action_space.n, config).to(device) # net_em.load_state_dict(torch.load("/home/valy/OneDrive/experiments/repl/9_22/Jan19_20-40-19_valy_em_22_9_True/best_1.4249e-06_195121.dat", map_location=lambda storage, loc: storage)) config.EM_NET = str(net_em) print(net) print(net_em) print("em param count: " + str(common.count_parameters(net_em))) # sets seed on torch operations and on all environments common.set_seed(seed=config.SEED, envs=envs) optimizer = optim.Adam(net_em.parameters(), lr=config.LEARNING_RATE) epoch = 0 best_loss = np.inf desc = "" pbar = trange(config.EM_STEPS, desc='', leave=True) progress = iter(pbar) with ptan.common.utils.TBMeanTracker( writer, batch_size=config.BATCH_SIZE) as tb_tracker: #obtain batch transitions from the a2c model free agent (st, at, st+1, r) for mb_obs, mb_obs_next, mb_actions, mb_rewards, done_rewards, done_steps in collect_experience( envs, net, config, device): if len(done_rewards) > 0: