예제 #1
0
    config = experiment_config.ExperimentCfg()
    config.make_i2a_config(parser)

    device = torch.device(config.DEVICE)

    writer = SummaryWriter(comment="_i2a_fc_" +
                           config.build_name_for_i2a_writer())
    saves_path = writer.logdir

    envs = [
        common.makeCustomizedGridEnv(config) for _ in range(config.NUM_ENVS)
    ]
    test_env = common.makeCustomizedGridEnv(config)

    #sets seed on torch operations and on all environments
    common.set_seed(config.SEED, envs=envs)
    common.set_seed(config.SEED, envs=[test_env])

    obs_shape = envs[0].observation_space.shape
    act_n = envs[0].action_space.n

    #    net_policy = common.AtariA2C(obs_shape, act_n).to(device)
    net_policy = common.getNet(config)
    config.A2CNET = str(net_policy)

    net_em = models.environment_model.EnvironmentModel(obs_shape, act_n,
                                                       config)
    # net_em.load_state_dict(torch.load(config.EM_FILE_NAME, map_location=lambda storage, loc: storage))
    net_em = net_em.to(device)
    config.EM_NET = str(net_em)
    parser = argparse.ArgumentParser()
    parser.add_argument("-n", "--name", required=True, help="Name of the run")
    parser.add_argument("--cuda", default=False, action="store_true", help="Enable CUDA")
    parser.add_argument("--em", required=True, help="Environment model file name")
    parser.add_argument("--seed", type=int, default=common.DEFAULT_SEED, help="Random seed to use, default=%d" % common.DEFAULT_SEED)
    args = parser.parse_args()
    device = torch.device("cuda" if args.cuda else "cpu")

    saves_path = os.path.join("saves", "03_i2a_" + args.name)
    os.makedirs(saves_path, exist_ok=True)

    envs = [common.make_env() for _ in range(common.NUM_ENVS)]
    test_env = common.make_env(test=True)

    if args.seed:
        common.set_seed(args.seed, envs, cuda=args.cuda)
        suffix = "-seed=%d" % args.seed
    else:
        suffix = ""
    writer = SummaryWriter(comment="-03_i2a_" + args.name + suffix)

    obs_shape = envs[0].observation_space.shape
    act_n = envs[0].action_space.n

    net_policy = common.AtariA2C(obs_shape, act_n).to(device)

    net_em = i2a.EnvironmentModel(obs_shape, act_n)
    net_em.load_state_dict(torch.load(args.em, map_location=lambda storage, loc: storage))
    net_em = net_em.to(device)

    net_i2a = i2a.I2A(obs_shape, act_n, net_em, net_policy, ROLLOUTS_STEPS).to(device)
예제 #3
0
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--cuda", default=True, action="store_true", help="Enable cuda")
    parser.add_argument("-n", "--name", required=True, help="Name of the run")
    parser.add_argument("--seed", type=int, default=common.DEFAULT_SEED, help="Random seed to use, default=%d" % common.DEFAULT_SEED)
    parser.add_argument("--steps", type=int, default=None, help="Limit of training steps, default=disabled")
    args = parser.parse_args()
    device = torch.device("cuda" if args.cuda else "cpu")
    print('device: ', device, )
    
    saves_path = os.path.join("saves", "01_a2c_" + args.name)
    os.makedirs(saves_path, exist_ok=True)

    envs = [common.make_env() for _ in range(common.NUM_ENVS)]
    if args.seed:
        common.set_seed(args.seed, envs, cuda=args.cuda)
        suffix = "-seed=%d" % args.seed
    else:
        suffix = ""

    test_env = common.make_env(test=True)
    writer = SummaryWriter(comment="-01_a2c_" + args.name + suffix)

    net = common.AtariA2C(envs[0].observation_space.shape, envs[0].action_space.n).to(device)
    print(net)
    optimizer = optim.RMSprop(net.parameters(), lr=LEARNING_RATE, eps=1e-5)

    step_idx = 0
    total_steps = 0
    best_reward = None
    ts_start = time.time()
예제 #4
0
    net = net.to(device)
    config.A2CNET = str(net)

    #    net = common.AtariA2C(envs[0].observation_space.shape, envs[0].action_space.n)
    net_em = models.environment_model.EnvironmentModel(
        envs[0].observation_space.shape, envs[0].action_space.n,
        config).to(device)
    #    net_em.load_state_dict(torch.load("/home/valy/OneDrive/experiments/repl/9_22/Jan19_20-40-19_valy_em_22_9_True/best_1.4249e-06_195121.dat", map_location=lambda storage, loc: storage))
    config.EM_NET = str(net_em)

    print(net)
    print(net_em)
    print("em param count: " + str(common.count_parameters(net_em)))

    # sets seed on torch operations and on all environments
    common.set_seed(seed=config.SEED, envs=envs)

    optimizer = optim.Adam(net_em.parameters(), lr=config.LEARNING_RATE)

    epoch = 0
    best_loss = np.inf
    desc = ""
    pbar = trange(config.EM_STEPS, desc='', leave=True)
    progress = iter(pbar)

    with ptan.common.utils.TBMeanTracker(
            writer, batch_size=config.BATCH_SIZE) as tb_tracker:
        #obtain batch transitions from the a2c model free agent (st, at, st+1, r)
        for mb_obs, mb_obs_next, mb_actions, mb_rewards, done_rewards, done_steps in collect_experience(
                envs, net, config, device):
            if len(done_rewards) > 0: