def test_dqn(): args = DQNArgs() env = gym.make(args.env_name) agent = DQNAgent(env, QNet, SimpleNormalizer, args) agent.load(args.save_dir) for _ in range(10): agent.test_one_episode(True)
class DQNScheduler: def __init__(self, simulator): self.agent = DQNAgent(25, 6) self.agent.load("./save/car-100-dqn.h5") self.simulator = simulator self.agent.epsilon = 0 def schedule(self): action = self.agent.act(np.reshape(self.simulator.get_state(), [1, 25])) return action
def load_model(MODEL_TYPE): curr_model = None if MODEL_TYPE == "SVM": print("LOADING SVM...") curr_model = load("svm.joblib") elif MODEL_TYPE == "LR": print("LOADING LR...") lr = LogReg(74) #(env.matches.shape[1]) lr.load_weights("weights/weights-improvement-100-0.31.hdf5") curr_model = lr elif MODEL_TYPE == "DT": print("LOADING DT...") curr_model = load("dt.joblib") elif MODEL_TYPE == "GB": print("LOADING GB...") curr_model = load("gb.joblib") elif MODEL_TYPE == "RF": print("LOADING RF...") curr_model = load("rfc.joblib") elif MODEL_TYPE == "NB": print("LOADING NB...") curr_model = load("nb.joblib") elif MODEL_TYPE == "AB": print("LOADING AB...") curr_model = load("ab.joblib") elif MODEL_TYPE == "DQN": print("LOADING DQN...") BetNet = DQNAgent(75) BetNet.load("weights/betnet-weights-dqn.h5") curr_model = BetNet else: print("LOADING NN...") BetNet = Network(74) #(env.matches.shape[1]) BetNet.load_weights( 'weights/Adadelta/test9_400_Best/weights-improvement-400-0.48.hdf5' ) #PCA("weights/Adadelta/test13_100iter_reglast2/weights-improvement-100-0.52.hdf5") # Most recent weights curr_model = BetNet return curr_model
def simulateGustsControl(self): ''' Simulate the response of the controller to gusts. :return: A plot of the simulation. ''' self.sim_time = 100 agent = DQNAgent(self.mdp.size, self.action_size) agent.load(self.src) WH = self.wh.generateWind() hdg0 = 0 * TORAD * np.ones(self.wh.samples) state = self.mdp.initializeMDP(hdg0, WH) i = np.ones(0) v = np.ones(0) wind_heading = np.ones(0) for time in range(self.sim_time): WH = self.wh.generateWind() if time == 20: WH = self.wh.generateGust(10 * TORAD) action = agent.actDeterministically(state) next_state, reward = self.mdp.transition(action, WH) state = next_state i = np.concatenate([i, self.mdp.extractSimulationData()[0, :]]) v = np.concatenate([v, self.mdp.extractSimulationData()[1, :]]) wind_heading = np.concatenate([wind_heading, WH[0:10]]) time_vec = np.linspace(0, self.sim_time, int((self.sim_time) / self.mdp.dt)) f, axarr = plt.subplots(2, sharex=True) axarr[0].plot(time_vec, i / TORAD) axarr[1].plot(time_vec, v) axarr[0].set_ylabel("angle of attack") axarr[1].set_ylabel("v") plt.show()
def simulateDQNControl(self, hdg0): ''' Plots the control law of the network over a simulation. :param hdg0: Initial heading of the boat for the simulation. :return: A plot of the angle of attack and velocity during the control. ''' agent = DQNAgent(self.mdp.size, self.action_size) agent.load(self.src) WH = self.wh.generateWind() hdg0 = hdg0 * TORAD * np.ones(self.wh.samples) state = self.mdp.initializeMDP(hdg0, WH) i = np.ones(0) v = np.ones(0) wind_heading = np.ones(0) for time in range(self.sim_time): WH = self.wh.generateWind() action = agent.actDeterministically(state) next_state, reward = self.mdp.transition(action, WH) state = next_state i = np.concatenate([i, self.mdp.extractSimulationData()[0, :]]) v = np.concatenate([v, self.mdp.extractSimulationData()[1, :]]) wind_heading = np.concatenate([wind_heading, WH[0:10]]) time_vec = np.linspace(0, self.sim_time, int((self.sim_time) / self.mdp.dt)) f, axarr = plt.subplots(2, sharex=True) axarr[0].plot(time_vec, i / TORAD) axarr[1].plot(time_vec, v) axarr[0].set_ylabel("i [°]") axarr[1].set_ylabel("v [m/s]") axarr[0].set_xlabel("t [s]") axarr[1].set_xlabel("t [s]") plt.show()
elif MODEL_TYPE == "GB": print("LOADING GB...") curr_model = load("gb.joblib") elif MODEL_TYPE == "RF": print("LOADING RF...") curr_model = load("rfc.joblib") elif MODEL_TYPE == "NB": print("LOADING NB...") curr_model = load("nb.joblib") elif MODEL_TYPE == "AB": print("LOADING AB...") curr_model = load("ab.joblib") elif MODEL_TYPE == "DQN": print("LOADING DQN...") BetNet = DQNAgent(75) BetNet.load("weights/betnet-weights-dqn.h5") curr_model = BetNet else: print("LOADING NN...") BetNet = Network(env.matches.shape[1]) BetNet.load_weights( "weights/Adadelta/test13_100iter_reglast2/weights-improvement-100-0.52.hdf5" ) # Most recent weights curr_model = BetNet ############################################################################### #GETS THE PREDICTION VEC GIVEN MODEL def generatePrediction(mt, curr_model, to_process): prediction = None
action_size = 9 actions = [[[0, 0], [-100, -100]], [[0, 0], [-100, 0]], [[0, 0], [-100, 100]], [[0, 0], [0, -100]], [[0, 0], [0, 0]], [[0, 0], [0, 100]], [[0, 0], [100, -100]], [[0, 0], [100, 0]], [[0, 0], [100, 100]]] # actions = [ # [[0,0],[0,0]], # [[0,0],[0,100]], # [[0,0],[100,0]], # [[0,0],[100,100]] # ] agent = DQNAgent(state_size, action_size) #load agent.load("./save/example_dqn.h5") done = False batch_size = 32 for e in range(EPISODES): state = env.reset() state = np.reshape(state, [1, state_size]) # print(e) last_reward = 0 for time in range(1000): # delay.sleep(1/50) #render env.render() # action = agent.act(state) action = agent.act_2(state) commands = actions[action]
from keras.optimizers import Adam from dqn import DQNAgent from keras.models import model_from_json from keras.models import load_model EPISODES = 100 if __name__ == "__main__": env = gym.make('CartPole-v1') state_size = env.observation_space.shape[0] action_size = env.action_space.n agent = DQNAgent(state_size, action_size) # agent.epsilon = 0.01 # agent.model = model_from_json(open('cartpole.json').read()) agent.load('cp2.h5') state = env.reset() state = np.reshape(state, [1, state_size]) done = False for t in range( 10000 ): env.render() # action = agent.act(state) act_values = agent.model.predict(state) action = np.argmax(act_values[0]) # returns action next_state, reward, done, _ = env.step(action) reward = reward if not done else -10 next_state = np.reshape(next_state, [1, state_size]) state = next_state
state_size = 3 action_size = 9 maxForce = 300 actions = [[[0, 0], [-maxForce, -maxForce]], [[0, 0], [2 * -maxForce, 0]], [[0, 0], [-maxForce, maxForce]], [[0, 0], [0, 2 * -maxForce]], [[0, 0], [0, 0]], [[0, 0], [0, 2 * maxForce]], [[0, 0], [maxForce, -maxForce]], [[0, 0], [2 * maxForce, 0]], [[0, 0], [maxForce, maxForce]]] agent = DQNAgent(state_size, action_size) #load agent.load("./save/execution1.h5") env = MyEnvironment(ut=3 / 50) batch_size = 100 print("done;episode;episodes;score;epsilon") for e in range(EPISODES): state = env.reset() state = np.reshape(state, [1, state_size]) # print(e) last_reward = 0 for time in range(MOVES): if (not TRAINING): # delay.sleep(1/50) env.render() else: env.render()
def generateAnimation(self, hdg0): """ Generate an animation showing the two Q-values during an interesting control simulation including gusts. :param hdg0: Initial heading of the boat for the simulation """ agent = DQNAgent(self.mdp.size, self.action_size) agent.load(self.src) WH = self.wh.generateWind() hdg0 = hdg0 * TORAD * np.ones(self.wh.samples) state = self.mdp.initializeMDP(hdg0, WH) i = np.ones(0) v = np.ones(0) NN_Q0 = np.zeros(self.sim_time) NN_Q1 = np.zeros(self.sim_time) wind_heading = np.ones(0) for timesim in range(self.sim_time): WH = self.wh.generateWind() if timesim == 50: WH = self.wh.generateGust(10 * TORAD) action = agent.actDeterministically(state) next_state, reward = self.mdp.transition(action, WH) state = next_state i = np.concatenate([i, self.mdp.extractSimulationData()[0, :]]) v = np.concatenate([v, self.mdp.extractSimulationData()[1, :]]) NN_Q0[timesim] = self.agent.evaluate(self.mdp.s)[0] NN_Q1[timesim] = self.agent.evaluate(self.mdp.s)[1] wind_heading = np.concatenate([wind_heading, WH[0:10]]) time_vec = np.linspace(0, self.sim_time, int((self.sim_time) / self.mdp.dt)) # Visualization tools start here f = plt.figure(figsize=(15, 5)) ax0 = f.add_subplot(2, 2, 1) ax1 = f.add_subplot(2, 2, 3) ax2 = f.add_subplot(2, 2, (2, 4)) ax0.set_title('Simulation') ax0.set_ylabel('i [°]') ax0.set_xlabel('t [s]') ax0.grid(linestyle='-', linewidth=1) ax1.set_ylabel('v [m/s]') ax1.set_xlabel('t [s]') ax1.grid(linestyle='-', linewidth=1) ax2.set_xticks([0, 1]) ax2.grid(linestyle='-', linewidth=1) ax2.set_xticklabels(['Bear-off', 'Luff']) ax2.set_ylim([14, 20]) ax2.set_xlim([0, 1]) ax2.set_title('Q(s,a) of actions') l0, = ax0.plot(time_vec, i) l1, = ax1.plot(time_vec, v) bar0 = ax2.bar([0, 1], [NN_Q0[0], NN_Q1[0]], color=['b', 'r']) def animate(k): l0.set_data(time_vec[:k], i[:k]) time.sleep(.0025) l1.set_data(time_vec[:k], v[:k]) if k % 10 == 0: kk = k // 10 bar0[0].set_height(NN_Q0[kk]) bar0[1].set_height(NN_Q1[kk]) return l0, l1, bar0 ani = animation.FuncAnimation(f, animate, frames=1000, interval=1, blit=False) plt.show() return ani
def generateDeltaAnimation(self, hdg0): """ Generate an animation showing the differences between the two Q-values during an interesting control simulation including gusts. :param hdg0: Initial heading of the boat for the simulation """ agent = DQNAgent(self.mdp.size, self.action_size) agent.load(self.src) WH = self.wh.generateWind() hdg0 = hdg0 * TORAD * np.ones(self.wh.samples) state = self.mdp.initializeMDP(hdg0, WH) i = np.ones(0) v = np.ones(0) NN_Q0 = np.zeros(self.sim_time) NN_Q1 = np.zeros(self.sim_time) wind_heading = np.ones(0) for timesim in range(self.sim_time): WH = self.wh.generateWind() if timesim == 50: WH = self.wh.generateGust(10 * TORAD) action = agent.actDeterministically(state) next_state, reward = self.mdp.transition(action, WH) state = next_state i = np.concatenate([i, self.mdp.extractSimulationData()[0, :]]) v = np.concatenate([v, self.mdp.extractSimulationData()[1, :]]) NN_Q0[timesim] = self.agent.evaluate(self.mdp.s)[0] NN_Q1[timesim] = self.agent.evaluate(self.mdp.s)[1] wind_heading = np.concatenate([wind_heading, WH[0:10]]) time_vec = np.linspace(0, self.sim_time, int((self.sim_time) / self.mdp.dt)) # Visualization tools start here f = plt.figure(figsize=(15, 5)) ax0 = f.add_subplot(2, 2, 1) ax1 = f.add_subplot(2, 2, 3) ax2 = f.add_subplot(2, 2, (2, 4)) ax0.set_title('Simulation') ax0.set_ylabel('i [°]') ax0.grid(linestyle='-', linewidth=1) ax1.set_ylabel('v [m/s]') ax1.set_xlabel('t [s]') ax1.grid(linestyle='-', linewidth=1) ax3 = ax2.twiny() tresh = np.max(NN_Q0 - NN_Q1) ax3.plot(np.linspace(-tresh, tresh, 100), 0.5 * np.ones(100)) # Create a dummy plot ax3.cla() ax2.set_xticks([-1, 1]) ax2.grid(linestyle='-', linewidth=1) ax2.set_xticklabels(['Bear off', 'Luff']) ax2.set_ylim([-.5, .5]) ax2.set_xlim([-1, 1]) ax2.set_title('Q(s,bear-off) - Q(s,luff)', y=-0.1) ax2.get_yaxis().set_visible(False) ax3.get_yaxis().set_visible(False) l0, = ax0.plot(time_vec, i / TORAD) l1, = ax1.plot(time_vec, v) bar0 = ax2.barh(0, NN_Q1[0] - NN_Q0[0]) ax2.plot([0, 0], [-0.5, 0.5], color='gray') def animate(k): l0.set_data(time_vec[:k], i[:k] / TORAD) time.sleep(.0025) l1.set_data(time_vec[:k], v[:k]) if k % 10 == 0: kk = k // 10 bar0[0].set_width(NN_Q1[kk] - NN_Q0[kk]) return l0, l1, bar0 ani = animation.FuncAnimation(f, animate, frames=1000, interval=1, blit=False) plt.show() return ani