def playActor(self): self.load("NetworkParam/FinalParam") hdg0_rand_vec=[0,7,12] ''' WIND CONDITIONS ''' mean = 45 * TORAD std = 0.1 * TORAD wind_samples = 10 w = wind(mean=mean, std=std, samples = wind_samples) try: for i in range(len(hdg0_rand_vec)): # Initial state WH = w.generateWind() hdg0_rand = hdg0_rand_vec[i] hdg0 = hdg0_rand * TORAD * np.ones(10) s = self.env.reset(hdg0,WH) episode_reward = 0 episode_step=0 v_episode=[] i_episode=[] while episode_step < 40: #not done: if episode_step==0: i_episode.append(hdg0_rand+WH[0]/TORAD-40) else: i_episode.append(s[0][-1]/TORAD) s = np.reshape([s[0,:], s[1,:]], [self.state_size,1]) a, = self.sess.run(self.network.actions, feed_dict={self.network.state_ph: s[None]}) a = np.clip(a, self.low_bound, self.high_bound) s_, r = self.env.act(a,WH) episode_reward += r v_episode.append(r) episode_step += 1 s = s_ DISPLAYER.displayVI(v_episode,i_episode,i) print("Episode reward :", episode_reward," for incidence: ",hdg0_rand) except KeyboardInterrupt as e: pass except Exception as e: print("Exception :", e) finally: print("End of the demo")
def play(self, sess, number_run, path=''): print("Playing", self.name, "for", number_run, "runs") with sess.as_default(), sess.graph.as_default(): hdg0_rand_vec = [0, 7, 13] ''' WIND CONDITIONS ''' mean = 45 * TORAD std = 0 * TORAD wind_samples = 10 w = wind(mean=mean, std=std, samples=wind_samples) try: for i in range(number_run): # Reset the local network to the global if self.name != 'global': sess.run(self.update_local_vars) WH = w.generateWind() hdg0_rand = hdg0_rand_vec[i] hdg0 = hdg0_rand * TORAD * np.ones(10) s = self.env.reset(hdg0, WH) episode_reward = 0 episode_step = 0 v_episode = [] i_episode = [] done = False #self.lstm_state = self.network.lstm_state_init while (not done and episode_step < 70): i_episode.append(round(s[0][-1] / TORAD)) s = np.reshape([s[0, :], s[1, :]], [2 * self.state_size, 1]) # Prediction of the policy feed_dict = {self.network.inputs: [s]} policy, value = sess.run( [self.network.policy, self.network.value], feed_dict=feed_dict) policy = policy[0] # Choose an action according to the policy action = np.random.choice([1.5, 0, -1.5], p=policy) s_, r = self.env.act(action, WH) if episode_step > 12: if np.mean(v_episode[-4:]) > 0.8: #done=True print("Done!") else: done = False episode_reward += r v_episode.append(r) episode_step += 1 s = s_ DISPLAYER.displayVI(v_episode, i_episode, i) print("Episode reward :", episode_reward) except KeyboardInterrupt as e: pass finally: print("End of the demo")
def playCritic(self): self.load("NetworkParam/FinalParam") hdg0_rand_vec=[0,7,12] ''' WIND CONDITIONS ''' mean = 45 * TORAD std = 0.1 * TORAD wind_samples = 10 w = wind(mean=mean, std=std, samples = wind_samples) try: for i in range(len(hdg0_rand_vec)): # Initial state WH = w.generateWind() hdg0_rand = hdg0_rand_vec[i] hdg0 = hdg0_rand * TORAD * np.ones(10) s = self.env.reset(hdg0,WH) episode_reward = 0 episode_step=0 v_episode=[] i_episode=[] while episode_step < 30: #not done: if episode_step==0: i_episode.append(hdg0_rand+WH[0]/TORAD-40) else: i_episode.append(s[0][-1]/TORAD) # Critic policy critic = [self.evaluate(s, -1.5),self.evaluate(s, -1.25),self.evaluate(s, -1), self.evaluate(s, -0.75),self.evaluate(s, -0.5),self.evaluate(s, -0.25),self.evaluate(s, 0),self.evaluate(s, 0.25), self.evaluate(s, 0.5),self.evaluate(s, 0.75),self.evaluate(s, 1),self.evaluate(s, 1.25), self.evaluate(s, 1.5)] a = np.argmax(critic) if a == 0: a = -1.5 if a == 1: a = -1.25 if a == 2: a = -1 if a == 3: a = -0.75 if a == 4: a = -0.5 if a == 5: a = -0.25 if a == 6: a = 0 if a == 7: a = 0.25 if a == 8: a = 0.5 if a == 9: a = 0.75 if a == 10: a = 1 if a == 11: a = 1.25 if a == 12: a = 1.5 s_, r = self.env.act(a,WH) episode_reward += r v_episode.append(r) episode_step += 1 s = s_ DISPLAYER.displayVI(v_episode,i_episode,i+3) print("Episode reward :", episode_reward," for incidence: ",hdg0_rand) except KeyboardInterrupt as e: pass except Exception as e: print("Exception :", e) finally: print("End of the demo")