def test_piecewise_schedule(): ps = PiecewiseSchedule([(-5, 100), (5, 200), (10, 50), (100, 50), (200, -50)], outside_value=500) assert np.isclose(ps.value(-10), 500) assert np.isclose(ps.value(0), 150) assert np.isclose(ps.value(5), 200) assert np.isclose(ps.value(9), 80) assert np.isclose(ps.value(50), 50) assert np.isclose(ps.value(80), 50) assert np.isclose(ps.value(150), 0) assert np.isclose(ps.value(175), -25) assert np.isclose(ps.value(201), 500) assert np.isclose(ps.value(500), 500) assert np.isclose(ps.value(200 - 1e-10), -50)
# Create training graph and replay buffer act, train, update_target, debug, craft_adv = deepq.build_train( make_obs_ph=lambda name: U.Uint8Input(env.observation_space.shape, name=name), q_func=dueling_model if args.dueling else model, num_actions=env.action_space.n, optimizer=tf.train.AdamOptimizer(learning_rate=args.lr, epsilon=1e-4), gamma=0.99, grad_norm_clipping=10, double_q=args.double_q, noisy=args.noisy, attack=args.attack) approximate_num_iters = args.num_steps / 4 exploration = PiecewiseSchedule([(0, 1.0), (approximate_num_iters / 50, 0.1), (approximate_num_iters / 5, 0.01)], outside_value=0.01) if args.prioritized: replay_buffer = PrioritizedReplayBuffer(args.replay_buffer_size, args.prioritized_alpha) beta_schedule = LinearSchedule(approximate_num_iters, initial_p=args.prioritized_beta0, final_p=1.0) else: replay_buffer = ReplayBuffer(args.replay_buffer_size) U.initialize() update_target() num_iters = 0
# Create training graph and replay buffer act, train, update_target, debug, craft_adv = deepq.build_train( make_obs_ph=lambda name: U.Uint8Input(env.observation_space.shape, name=name), q_func=dueling_model if args.dueling else model, num_actions=env.action_space.n, optimizer=tf.train.AdamOptimizer(learning_rate=args.lr, epsilon=1e-4), gamma=0.99, grad_norm_clipping=10, double_q=args.double_q, noisy=args.noisy, attack = args.attack ) approximate_num_iters = args.num_steps / 4 exploration = PiecewiseSchedule([ (0, 1.0), (approximate_num_iters / 50, 0.1), (approximate_num_iters / 5, 0.01) ], outside_value=0.01) if args.prioritized: replay_buffer = PrioritizedReplayBuffer(args.replay_buffer_size, args.prioritized_alpha) beta_schedule = LinearSchedule(approximate_num_iters, initial_p=args.prioritized_beta0, final_p=1.0) else: replay_buffer = ReplayBuffer(args.replay_buffer_size) U.initialize() update_target() num_iters = 0 # Load the model state = maybe_load_model(savedir, container)
make_obs_ph=lambda name: U.Uint8Input(env.observation_space.shape, name=name), q_func=dueling_model if args.dueling else model, num_actions=env.action_space.n, optimizer=tf.train.AdamOptimizer(learning_rate=args.lr, epsilon=1e-4), gamma=0.99, grad_norm_clipping=10, double_q=args.double_q, noisy=args.noisy, attack=args.attack ) approximate_num_iters = args.num_steps / 4 exploration = PiecewiseSchedule([ (0, 1.0), (approximate_num_iters / 50, 0.1), (approximate_num_iters / 5, 0.01) ], outside_value=0.01) if args.prioritized: replay_buffer = PrioritizedReplayBuffer(args.replay_buffer_size, args.prioritized_alpha) beta_schedule = LinearSchedule(approximate_num_iters, initial_p=args.prioritized_beta0, final_p=1.0) else: replay_buffer = ReplayBuffer(args.replay_buffer_size) U.initialize() update_target() num_iters = 0