def train_dqn(): args = DQNArgs() env = gym.make(args.env_name) agent = DQNAgent(env, QNet, SimpleNormalizer, args) pre_best = -1e9 for ep in range(args.max_ep): agent.train_one_episode() if ep % args.test_interval == 0: r = agent.test_model() if r > pre_best: pre_best = r agent.save(args.save_dir)
if len(agent.memory) >= batch_size: loss_sim_list.append(agent.replay(batch_size)) # For data visualisation i.append(mdp.s[0, -1]) v.append(mdp.s[1, -1]) r.append(mdp.reward) loss_over_simulation_time = np.sum(np.array([loss_sim_list])[0]) / len( np.array([loss_sim_list])[0]) loss_of_episode.append(loss_over_simulation_time) print("Initial Heading : {}".format(hdg0_rand)) print("----------------------------") print("episode: {}/{}, Mean Loss = {}".format(e, EPISODES, loss_over_simulation_time)) print("----------------------------") agent.save("../Networks/dqn-test") # plt.semilogy(np.linspace(1, EPISODES, EPISODES), np.array(loss_of_episode)) # plt.xlabel("Episodes") # plt.ylabel("Cost") f, axarr = plt.subplots(4, sharex=True) axarr[0].plot(np.array(i[floor(len(i) / 2):len(i) - 1]) / TORAD) axarr[1].plot(v[floor(len(i) / 2):len(i) - 1]) axarr[2].plot(r[floor(len(i) / 2):len(i) - 1]) axarr[3].semilogy(loss_sim_list[floor(len(i) / 2):len(i) - 1]) axarr[0].set_ylabel("angle of attack") axarr[1].set_ylabel("v") axarr[2].set_ylabel("r") axarr[3].set_ylabel("cost") plt.show()
state_size = 3 action_size = 9 actions = [ [[0,0],[-100,-100]], [[0,0],[-100,0]], [[0,0],[-100,100]], [[0,0],[0,-100]], [[0,0],[0,0]], [[0,0],[0,100]], [[0,0],[100,-100]], [[0,0],[100,0]], [[0,0],[100,100]]] env = MyEnvironment() agent = DQNAgent(state_size, action_size) agent.load("./save/example_dqn.h5")#load batch_size = 32 for e in range(3000): state = np.reshape(env.reset(), [1, state_size]) last_reward = 0 for time in range(1000): env.render() #render action = agent.act_2(state) commands = actions[action] next_state, reward, done, _ = env.step2(commands) next_state = np.reshape(next_state, [1, state_size]) agent.remember(state, action, reward, next_state, done) state = next_state if len(agent.memory) > batch_size: agent.replay(batch_size) if e % 1 == 0: agent.save("./save/example_dqn.h5")
action = agent.act(state) next_state, reward, done, _ = env.step(action) # reward = reward if not done else -10 next_state = np.reshape(next_state, [1, state_size]) agent.remember(state, action, reward, next_state, done) state = next_state # print(action, reward) if done: print("episode: {}/{}, score: {}, e: {:.5}".format( e, EPISODES, time, agent.epsilon)) break if len(agent.memory) > batch_size: agent.replay(batch_size) if e % 10 == 0: save_string = './save/' + stock_name + '_weights_with_fees.h5' agent.save(save_string) # # serialize model to JSON # model_json = model.to_json() # with open("model.json", "w") as json_file: # json_file.write(model_json) # # serialize weights to HDF5 # model.save_weights("model.h5") # print("Saved model to disk") # # later... # # load json and create model # json_file = open('model.json', 'r') # loaded_model_json = json_file.read() # json_file.close()
next_state = np.reshape(next_state, [1, state_size]) state = next_state if (done): reward = 2000 if (reward <= last_reward and done == False): last_reward = reward reward = -1000 else: last_reward = reward if (TRAINING): agent.remember(state, action, reward, next_state, done) if done: print("1;{};{};{:.2f};{:.2}".format(e, EPISODES, reward, agent.epsilon)) if (TRAINING): agent.replay(len(agent.memory)) break if len(agent.memory) > batch_size and TRAINING: agent.replay(batch_size) if (time == MOVES - 1): print("0;{};{};{:.2f};{:.2}".format(e, EPISODES, last_reward, agent.epsilon)) if (TRAINING): agent.save("./save/execution1.h5")