Ejemplo n.º 1
0
        model.add(Dropout(dropout))
    model.add(Dense(output_states))
    sgd = Adam(lr=0.003, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
    model.compile(loss='mean_squared_error', optimizer=sgd)
    return model


q_nn = build_network(4, 6, 1, 32, "relu", 0.0)
target_nn = build_network(4, 6, 1, 32, "relu", 0.0)

target_nn.set_weights(q_nn.get_weights())

replay1 = replay_memory_agent(4, 10000)

dqn_controller = deep_q_agent(action_value_model=q_nn,
                              target_model=target_nn,
                              states_len=4,
                              replay_memory=replay1)

env = gym.make("CartPole-v0")

check_save = -100
# Book keeping
avg_reward_episodes = []
# Global time step
gt = 0
epsilon = 1.0
for episodes in range(0, 8000):

    # Initial State
    state = env.reset()
    done_1 = False
Ejemplo n.º 2
0
import matplotlib.pyplot as plt
np.random.seed(42)
import gym
# Make the environemnt
env = gym.make("CartPole-v0")
# Tensorflow Session
sess = tf.Session()
# Initalize the neural network
ac_function = network(4, 2, sess)
target_function = network(4, 2, sess)
# Make sure both networks start from the same weight
target_function.set_weights(ac_function)
# Replay memory
replay = replay_memory_agent(4, 10000)
# Deep Q learning agent
prof_x = deep_q_agent(ac_function, target_function, 4, replay.replay_memory,
                      epsi_greedy)
state = env.reset()
# book keeping
done = False
episodes = 0
reward_episode = []
reward_track = []
epsilon = 1.0
while episodes < 8000:
    # Pick action
    state = np.asarray(state)
    state = state.reshape(1, 4)
    q_values = ac_function.predict_on_batch(state)
    action = epsi_greedy([0, 1], q_values, epsilon)
    # implement action
    state_new, reward, done, _ = env.step(action)