Example #1
0
def test_piecewise_schedule():
    ps = PiecewiseSchedule([(-5, 100), (5, 200), (10, 50), (100, 50),
                            (200, -50)],
                           outside_value=500)

    assert np.isclose(ps.value(-10), 500)
    assert np.isclose(ps.value(0), 150)
    assert np.isclose(ps.value(5), 200)
    assert np.isclose(ps.value(9), 80)
    assert np.isclose(ps.value(50), 50)
    assert np.isclose(ps.value(80), 50)
    assert np.isclose(ps.value(150), 0)
    assert np.isclose(ps.value(175), -25)
    assert np.isclose(ps.value(201), 500)
    assert np.isclose(ps.value(500), 500)

    assert np.isclose(ps.value(200 - 1e-10), -50)
        # Create training graph and replay buffer
        act, train, update_target, debug, craft_adv = deepq.build_train(
            make_obs_ph=lambda name: U.Uint8Input(env.observation_space.shape,
                                                  name=name),
            q_func=dueling_model if args.dueling else model,
            num_actions=env.action_space.n,
            optimizer=tf.train.AdamOptimizer(learning_rate=args.lr,
                                             epsilon=1e-4),
            gamma=0.99,
            grad_norm_clipping=10,
            double_q=args.double_q,
            noisy=args.noisy,
            attack=args.attack)
        approximate_num_iters = args.num_steps / 4
        exploration = PiecewiseSchedule([(0, 1.0),
                                         (approximate_num_iters / 50, 0.1),
                                         (approximate_num_iters / 5, 0.01)],
                                        outside_value=0.01)

        if args.prioritized:
            replay_buffer = PrioritizedReplayBuffer(args.replay_buffer_size,
                                                    args.prioritized_alpha)
            beta_schedule = LinearSchedule(approximate_num_iters,
                                           initial_p=args.prioritized_beta0,
                                           final_p=1.0)
        else:
            replay_buffer = ReplayBuffer(args.replay_buffer_size)

        U.initialize()
        update_target()
        num_iters = 0
Example #3
0
        # Create training graph and replay buffer
        act, train, update_target, debug, craft_adv = deepq.build_train(
            make_obs_ph=lambda name: U.Uint8Input(env.observation_space.shape, name=name),
            q_func=dueling_model if args.dueling else model,
            num_actions=env.action_space.n,
            optimizer=tf.train.AdamOptimizer(learning_rate=args.lr, epsilon=1e-4),
            gamma=0.99,
            grad_norm_clipping=10,
            double_q=args.double_q,
            noisy=args.noisy,
            attack = args.attack
        )
        approximate_num_iters = args.num_steps / 4
        exploration = PiecewiseSchedule([
            (0, 1.0),
            (approximate_num_iters / 50, 0.1),
            (approximate_num_iters / 5, 0.01)
        ], outside_value=0.01)

        if args.prioritized:
            replay_buffer = PrioritizedReplayBuffer(args.replay_buffer_size, args.prioritized_alpha)
            beta_schedule = LinearSchedule(approximate_num_iters, initial_p=args.prioritized_beta0, final_p=1.0)
        else:
            replay_buffer = ReplayBuffer(args.replay_buffer_size)

        U.initialize()
        update_target()
        num_iters = 0

        # Load the model
        state = maybe_load_model(savedir, container)
Example #4
0
            make_obs_ph=lambda name: U.Uint8Input(env.observation_space.shape,
                                                  name=name),
            q_func=dueling_model if args.dueling else model,
            num_actions=env.action_space.n,
            optimizer=tf.train.AdamOptimizer(learning_rate=args.lr,
                                             epsilon=1e-4),
            gamma=0.99,
            grad_norm_clipping=10,
            double_q=args.double_q,
            noisy=args.noisy,
            attack=args.attack
        )
        approximate_num_iters = args.num_steps / 4
        exploration = PiecewiseSchedule([
            (0, 1.0),
            (approximate_num_iters / 50, 0.1),
            (approximate_num_iters / 5, 0.01)
        ], outside_value=0.01)

        if args.prioritized:
            replay_buffer = PrioritizedReplayBuffer(args.replay_buffer_size,
                                                    args.prioritized_alpha)
            beta_schedule = LinearSchedule(approximate_num_iters,
                                           initial_p=args.prioritized_beta0,
                                           final_p=1.0)
        else:
            replay_buffer = ReplayBuffer(args.replay_buffer_size)

        U.initialize()
        update_target()
        num_iters = 0