def step(self): self.actnum += 1 prev_state = self.state # Update state qdr, dist = qdrdist(traf.lat[self.acidx], traf.lon[self.acidx], traf.ap.route[self.acidx].wplat[-2], traf.ap.route[self.acidx].wplon[-2]) t = agent.sta - ETA(agent.acidx) print('STA {}, ETA {}, t {}'.format(agent.sta, ETA(agent.acidx), t)) hdg_rel = degto180(qdr - traf.hdg[agent.acidx]) # self.state = np.array([dist, t, hdg_ref, # traf.tas[agent.acidx]]) self.state = np.array([dist, t, hdg_rel / 180.]) # Check episode termination if dist < 1 or agent.sta - sim.simt < -60: if agent.epsilon > agent.epsilon_min: agent.epsilon -= 0.9 / 1000. self.done = True env.reset() reward = self.gen_reward() print('State {}'.format(self.state)) print('Reward {}, epsilon {}'.format(reward, agent.epsilon)) if train_phase: self.log() return self.state, reward, self.done, prev_state
def test(): eventmanager.update() for i in eventmanager.events: if env.actnum == 0: agent.sta = ETA(agent.acidx) + random.random() * 100 # print('STA ', agent.sta) next_state, reward, done, prev_state = env.step() if not done: agent.act_test(next_state)
def train(): eventmanager.update() for i in eventmanager.events: if env.actnum == 0: agent.sta = ETA(agent.acidx) + random.random() * 100 next_state, reward, done, prev_state = env.step() next_state = np.reshape(next_state, [1, agent.state_size]) if env.actnum > 0: agent.remember(prev_state, agent.action, reward, next_state, done) if len(agent.memory) > agent.batch_size: agent.replay(agent.batch_size) if not done: agent.act(next_state)
def test(): eventmanager.update() # print('ETA {} wp {}'.format(ETA(agent.acidx), traf.ap.route[0].wpname[traf.ap.route[0].iactwp])) for i in eventmanager.events: if env.actnum == 0: agent.sta = ETA(agent.acidx) + random.random() * 100 # print('STA ', agent.sta) next_state, reward, done, prev_state = env.step() if not done: agent.act_test(next_state) f = open(agent.testname, 'a') f.write("{},{},{},{},{},{},{},{},{},{}\n".format(env.ep, env.actnum, env.reward, env.state[0], env.state[1], env.state[2], agent.sta, sim.simt, traf.lat[env.acidx], traf.lon[env.acidx])) f.close() if env.ep>25: sim.stop()