Esempio n. 1
0
def get_agent(name, *args, **kwargs):
    if name == 'QLearningAgent':
        import qlearning
        return qlearning.QLearningAgent(*args, **kwargs)
    elif name == 'DQNAgent':
        import dqn
        return dqn.DQNAgent(*args, **kwargs)
    else:
        raise Exception('Invalid agent name.')
Esempio n. 2
0
def make_agent(args, input_shape, num_actions: int, output_dir: str):
    replay_memory = ReplayMemory(max_size=args.replay_buffer_size,
                                 batch_size=args.batch_size)
    exploration_strategy = EpsilonGreedyStrategy(
        epsilon_max=args.epsilon_max,
        epsilon_min=args.epsilon_min,
        epsilon_decay=args.epsilon_decay,
    )

    hyper_parameters = dqn.HyperParameters(args.learning_rate, args.gamma)

    if args.dueling:
        dqn_class = dqn.make_dqn_dueling
    else:
        dqn_class = dqn.make_dqn

    if args.double:
        agent = dqn.DoubleDQNAgent(
            target_dqn=dqn_class(
                input_shape=input_shape,
                hidden_dim=args.hidden_dim,
                num_actions=num_actions,
            ),
            target_update_rate=args.target_update_rate,
            dqn=dqn_class(
                input_shape=input_shape,
                hidden_dim=args.hidden_dim,
                num_actions=num_actions,
            ),
            replay_memory=replay_memory,
            exploration_strategy=exploration_strategy,
            hyper_parameters=hyper_parameters,
            num_actions=num_actions,
            output_dir=output_dir,
        )
    else:
        agent = dqn.DQNAgent(
            dqn=dqn_class(
                input_shape=input_shape,
                hidden_dim=args.hidden_dim,
                num_actions=num_actions,
            ),
            replay_memory=replay_memory,
            exploration_strategy=exploration_strategy,
            hyper_parameters=hyper_parameters,
            num_actions=num_actions,
            output_dir=output_dir,
        )
    return agent
Esempio n. 3
0
def main():
    env = gym.make('CartPole-v1')
    state_size = env.observation_space.shape[0]
    action_size = env.action_space.n

    learning_rate = 1e-3
    model = build_model(state_size, action_size, learning_rate)
    agent = dqn.DQNAgent(model, state_size, action_size)

    agent.fit()

    env = gym.wrappers.Monitor(env,
                               "./video",
                               video_callable=lambda episode_id: True,
                               force=True)
    for _ in range(10):
        agent.play(env)
Esempio n. 4
0

env = gym.make('CartPole-v0')
exprep = exp_replay.ExpReplay(mem_size=MEM_SIZE,
                              start_mem=START_MEM,
                              state_size=STATE_SIZE,
                              kth=-1,
                              batch_size=BATCH_SIZE)

sess = tf.Session()
with tf.device('/{}:0'.format(DEVICE)):
    agent = dqn.DQNAgent(session=sess,
                         epsilon=EPSILON,
                         epsilon_anneal=EPSILON_DECAY,
                         end_epsilon=END_EPSILON,
                         lr=LEARNING_RATE,
                         gamma=DISCOUNT_FACTOR,
                         state_size=4,
                         action_size=len(ACTIONS),
                         n_hidden_1=10,
                         n_hidden_2=10)

sess.run(tf.initialize_all_variables())
saver = tf.train.Saver()
if os.path.isdir(MODEL_DIR):
    saver.restore(sess, MODEL_PATH)
    agent.epsilon = agent.end_epsilon
    print 'restored model'
    if TRAIN:
        exprep = pickle.load(open(MEMORY_PATH, "rb"))
        history = [e_length for e_length in train(agent, exprep, env)]
        saver.save(sess, MODEL_PATH)
Esempio n. 5
0
 def __init__(self, state_size, action_size):
     self.agent = D.DQNAgent(state_size, action_size)
     self.N = state_size
     self.reward = []
Esempio n. 6
0
        q_net.cuda()
        agent = dqn.DistributionalDQNAgent(q_net, args.double_dqn,
                                           train_env.num_actions,
                                           args.num_atoms, -10, 10)
    else:
        if args.dueling:
            q_net_builder = model.build_dueling_network
        else:
            q_net_builder = model.build_basic_network

        q_net = q_net_builder(args.num_frames, args.frame_size,
                              train_env.num_actions, args.noisy_net,
                              args.sigma0, args.net)

        q_net.cuda()
        agent = dqn.DQNAgent(q_net, args.double_dqn, train_env.num_actions)

    if args.noisy_net:
        train_policy = GreedyEpsilonPolicy(0, agent)
    else:
        train_policy = LinearDecayGreedyEpsilonPolicy(args.train_start_eps,
                                                      args.train_final_eps,
                                                      args.train_eps_num_steps,
                                                      agent)

    eval_policy = GreedyEpsilonPolicy(args.eval_eps, agent)
    replay_memory = ReplayMemory(args.replay_buffer_size)
    replay_memory.burn_in(train_env, agent, args.burn_in_frames)

    evaluator = lambda logger: evaluate(eval_env, eval_policy, 10, logger)
    train(agent, train_env, train_policy, replay_memory, args.gamma,
Esempio n. 7
0
        agent.epsilon_decay()
        print("当前的epsilon为{}".format(agent.epsilon))
        agent.learn(buffer=buffer, num_steps=128, batch_size=256)
        if i % 20 == 0 and i > 0:
            saver.save(sess, save_path)
            print("save model successfully!")


if __name__ == "__main__":
    env = SnakeEnv(gameSpeed=5, train_model=True)
    save_path = "./snake/model"
    # ob = env.reset()
    # print(ob,type(ob),ob.shape)
    buffer = ReplayBuffer(buffer_size=8192)
    sess = tf.Session()
    agent = dqn.DQNAgent(sess=sess,
                         epsilon=0.9,
                         epsilon_anneal=0.01,
                         end_epsilon=0.1,
                         lr=0.001,
                         gamma=0.9,
                         state_size=3,
                         action_size=4,
                         name_scope="dqn")
    sess.run(tf.global_variables_initializer())
    train(agent=agent,
          buffer=buffer,
          env=env,
          num_episodes=10000,
          max_steps=100,
          save_path=save_path)
Esempio n. 8
0
import dqn as dqn
import gym
import numpy as np

episodes = 500
EPISODES = 100
batch_size =32 
TRAIN_MODE = True
TEST_MODE = False

if __name__ == "__main__":
    env = gym.make('CartPole-v0')
    state_size = env.observation_space.shape[0]
    action_size = env.action_space.n
    print(state_size,action_size)
    agent = dqn.DQNAgent(state_size,action_size)
    if TRAIN_MODE:
        #Iterate the game
        for e in range(EPISODES):

            #rest state in the beginning of each game
            state = env.reset()
            state = np.reshape(state, [1,4])

            #time_t represents each frame of the game
            for time_t in  range(500):
                # turn this on if you want to render
                # env.render()
                # Decide action
                action = agent.act(state)
                # Advance the game to the next frame based on the action.