Exemple #1
0
    def step(self):
        self.actnum += 1
        prev_state = self.state

        # Update state
        qdr, dist = qdrdist(traf.lat[self.acidx], traf.lon[self.acidx],
                            traf.ap.route[self.acidx].wplat[-2],
                            traf.ap.route[self.acidx].wplon[-2])
        t = agent.sta - ETA(agent.acidx)
        print('STA {}, ETA {}, t {}'.format(agent.sta, ETA(agent.acidx), t))
        hdg_rel = degto180(qdr - traf.hdg[agent.acidx])
        #        self.state = np.array([dist, t, hdg_ref,
        #                               traf.tas[agent.acidx]])
        self.state = np.array([dist, t, hdg_rel / 180.])

        # Check episode termination
        if dist < 1 or agent.sta - sim.simt < -60:
            if agent.epsilon > agent.epsilon_min:
                agent.epsilon -= 0.9 / 1000.
            self.done = True
            env.reset()

        reward = self.gen_reward()
        print('State {}'.format(self.state))
        print('Reward {}, epsilon {}'.format(reward, agent.epsilon))

        if train_phase:
            self.log()

        return self.state, reward, self.done, prev_state
Exemple #2
0
def test():
    eventmanager.update()

    for i in eventmanager.events:
        if env.actnum == 0:
            agent.sta = ETA(agent.acidx) + random.random() * 100


#            print('STA ', agent.sta)
        next_state, reward, done, prev_state = env.step()

        if not done:
            agent.act_test(next_state)
Exemple #3
0
def train():
    eventmanager.update()

    for i in eventmanager.events:
        if env.actnum == 0:
            agent.sta = ETA(agent.acidx) + random.random() * 100
        next_state, reward, done, prev_state = env.step()

        next_state = np.reshape(next_state, [1, agent.state_size])
        if env.actnum > 0:
            agent.remember(prev_state, agent.action, reward, next_state, done)
        if len(agent.memory) > agent.batch_size:
            agent.replay(agent.batch_size)
        if not done:
            agent.act(next_state)
Exemple #4
0
def test():
    eventmanager.update()

#    print('ETA {} wp {}'.format(ETA(agent.acidx), traf.ap.route[0].wpname[traf.ap.route[0].iactwp]))
    for i in eventmanager.events:
        if env.actnum == 0:
            agent.sta = ETA(agent.acidx) + random.random() * 100
#            print('STA ', agent.sta)
        next_state, reward, done, prev_state = env.step()

        if not done:
            agent.act_test(next_state)

        f = open(agent.testname, 'a')
        f.write("{},{},{},{},{},{},{},{},{},{}\n".format(env.ep, env.actnum, env.reward, env.state[0], env.state[1], env.state[2], agent.sta, sim.simt, traf.lat[env.acidx], traf.lon[env.acidx]))
        f.close()

        if env.ep>25:
            sim.stop()