コード例 #1
0
ファイル: 03_i2a.py プロジェクト: mecha2k/rl_handson
    os.makedirs(saves_path, exist_ok=True)

    envs = [common.make_env() for _ in range(common.NUM_ENVS)]
    test_env = common.make_env(test=True)

    if args.seed:
        common.set_seed(args.seed, envs, cuda=args.cuda)
        suffix = "-seed=%d" % args.seed
    else:
        suffix = ""
    writer = SummaryWriter(comment="-03_i2a_" + args.name + suffix)

    obs_shape = envs[0].observation_space.shape
    act_n = envs[0].action_space.n

    net_policy = common.AtariA2C(obs_shape, act_n).to(device)

    net_em = i2a.EnvironmentModel(obs_shape, act_n)
    net_em.load_state_dict(
        torch.load(args.em, map_location=lambda storage, loc: storage))
    net_em = net_em.to(device)

    net_i2a = i2a.I2A(obs_shape, act_n, net_em, net_policy,
                      ROLLOUTS_STEPS).to(device)
    print(net_i2a)

    obs = envs[0].reset()
    obs_v = ptan.agent.default_states_preprocessor([obs]).to(device)
    res = net_i2a(obs_v)

    optimizer = optim.RMSprop(net_i2a.parameters(), lr=LEARNING_RATE, eps=1e-5)
コード例 #2
0
    device = torch.device("cuda" if args.cuda else "cpu")

    saves_path = os.path.join("saves", "01_a2c_" + args.name)
    os.makedirs(saves_path, exist_ok=True)

    envs = [common.make_env() for _ in range(common.NUM_ENVS)]
    if args.seed:
        common.set_seed(args.seed, envs, cuda=args.cuda)
        suffix = "-seed=%d" % args.seed
    else:
        suffix = ""

    test_env = common.make_env(test=True)
    writer = SummaryWriter(comment="-01_a2c_" + args.name + suffix)

    net = common.AtariA2C(envs[0].observation_space.shape,
                          envs[0].action_space.n).to(device)
    print(net)
    optimizer = optim.RMSprop(net.parameters(), lr=LEARNING_RATE, eps=1e-5)

    step_idx = 0
    total_steps = 0
    best_reward = None
    ts_start = time.time()
    best_test_reward = None
    with ptan.common.utils.TBMeanTracker(writer, batch_size=100) as tb_tracker:
        for (
                mb_obs,
                mb_rewards,
                mb_actions,
                mb_values,
                _,
コード例 #3
0
ファイル: play.py プロジェクト: mecha2k/rl_handson
    parser.add_argument("--seed", type=int, default=0, help="Random seed")
    args = parser.parse_args()
    device = torch.device("cuda" if args.cuda else "cpu")

    torch.manual_seed(args.seed)
    np.random.seed(args.seed)

    make_env = lambda: ptan.common.wrappers.wrap_dqn(
        gym.make("BreakoutNoFrameskip-v4"),
        stack_frames=common.FRAMES_COUNT,
        episodic_life=False,
        reward_clipping=False,
    )
    env = make_env()
    env = gym.wrappers.Monitor(env, args.write)
    net = common.AtariA2C(env.observation_space.shape, env.action_space.n)
    net.load_state_dict(
        torch.load(args.model, map_location=lambda storage, loc: storage))
    if args.cuda:
        net.cuda()

    act_selector = ptan.actions.ProbabilityActionSelector()

    obs = env.reset()
    total_reward = 0.0
    total_steps = 0

    while True:
        obs_v = ptan.agent.default_states_preprocessor([obs]).to(device)
        logits_v, values_v = net(obs_v)
        probs_v = F.softmax(logits_v)