예제 #1
0
def train_dqn():
    args = DQNArgs()
    env = gym.make(args.env_name)
    agent = DQNAgent(env, QNet, SimpleNormalizer, args)
    pre_best = -1e9
    for ep in range(args.max_ep):
        agent.train_one_episode()
        if ep % args.test_interval == 0:
            r = agent.test_model()
            if r > pre_best:
                pre_best = r
                agent.save(args.save_dir)
예제 #2
0
        if len(agent.memory) >= batch_size:
            loss_sim_list.append(agent.replay(batch_size))
            # For data visualisation
            i.append(mdp.s[0, -1])
            v.append(mdp.s[1, -1])
            r.append(mdp.reward)

    loss_over_simulation_time = np.sum(np.array([loss_sim_list])[0]) / len(
        np.array([loss_sim_list])[0])
    loss_of_episode.append(loss_over_simulation_time)
    print("Initial Heading : {}".format(hdg0_rand))
    print("----------------------------")
    print("episode: {}/{}, Mean Loss = {}".format(e, EPISODES,
                                                  loss_over_simulation_time))
    print("----------------------------")
agent.save("../Networks/dqn-test")

# plt.semilogy(np.linspace(1, EPISODES, EPISODES), np.array(loss_of_episode))
# plt.xlabel("Episodes")
# plt.ylabel("Cost")

f, axarr = plt.subplots(4, sharex=True)
axarr[0].plot(np.array(i[floor(len(i) / 2):len(i) - 1]) / TORAD)
axarr[1].plot(v[floor(len(i) / 2):len(i) - 1])
axarr[2].plot(r[floor(len(i) / 2):len(i) - 1])
axarr[3].semilogy(loss_sim_list[floor(len(i) / 2):len(i) - 1])
axarr[0].set_ylabel("angle of attack")
axarr[1].set_ylabel("v")
axarr[2].set_ylabel("r")
axarr[3].set_ylabel("cost")
plt.show()
예제 #3
0
state_size = 3
action_size = 9

actions = [ [[0,0],[-100,-100]],    [[0,0],[-100,0]],   [[0,0],[-100,100]],
            [[0,0],[0,-100]],       [[0,0],[0,0]],      [[0,0],[0,100]],
            [[0,0],[100,-100]],     [[0,0],[100,0]],    [[0,0],[100,100]]]

env = MyEnvironment()
agent = DQNAgent(state_size, action_size)
agent.load("./save/example_dqn.h5")#load
batch_size = 32
for e in range(3000):
    state = np.reshape(env.reset(), [1, state_size])
    last_reward = 0
    for time in range(1000):
        env.render() #render
        action = agent.act_2(state)
        commands = actions[action]
        next_state, reward, done, _ = env.step2(commands)
        next_state = np.reshape(next_state, [1, state_size])
        agent.remember(state, action, reward, next_state, done)
        state = next_state

        if len(agent.memory) > batch_size:
            agent.replay(batch_size)
    if e % 1 == 0:
        agent.save("./save/example_dqn.h5")


        
예제 #4
0
                action = agent.act(state)
                next_state, reward, done, _ = env.step(action)
                # reward = reward if not done else -10
                next_state = np.reshape(next_state, [1, state_size])
                agent.remember(state, action, reward, next_state, done)
                state = next_state
                # print(action, reward)
                if done:
                    print("episode: {}/{}, score: {}, e: {:.5}".format(
                        e, EPISODES, time, agent.epsilon))
                    break
                if len(agent.memory) > batch_size:
                    agent.replay(batch_size)
            if e % 10 == 0:
                save_string = './save/' + stock_name + '_weights_with_fees.h5'
                agent.save(save_string)

# # serialize model to JSON
# model_json = model.to_json()
# with open("model.json", "w") as json_file:
#     json_file.write(model_json)
# # serialize weights to HDF5
# model.save_weights("model.h5")
# print("Saved model to disk")

# # later...

# # load json and create model
# json_file = open('model.json', 'r')
# loaded_model_json = json_file.read()
# json_file.close()
예제 #5
0
        next_state = np.reshape(next_state, [1, state_size])
        state = next_state

        if (done):
            reward = 2000

        if (reward <= last_reward and done == False):
            last_reward = reward
            reward = -1000
        else:
            last_reward = reward

        if (TRAINING):
            agent.remember(state, action, reward, next_state, done)

        if done:
            print("1;{};{};{:.2f};{:.2}".format(e, EPISODES, reward,
                                                agent.epsilon))
            if (TRAINING):
                agent.replay(len(agent.memory))
            break
        if len(agent.memory) > batch_size and TRAINING:
            agent.replay(batch_size)
        if (time == MOVES - 1):
            print("0;{};{};{:.2f};{:.2}".format(e, EPISODES, last_reward,
                                                agent.epsilon))

    if (TRAINING):
        agent.save("./save/execution1.h5")