예제 #1
0
class MobileAvoidance(EnvSpace):
    def env_init(self):
        self.env = CarEnv()
        self.state = self.env.reset()
        self.send_state_get_action(self.state)

        self.var = 1

    def on_predict_response(self, action):
        self.var = self.var * 0.9995 if self.ep_use_step > cfg['DDPG'][
            'memory_capacity'] else self.var
        a = np.clip(np.random.normal(action, self.var), *self.env.action_bound)
        next_state, reward, done, _ = self.env.step(action)
        # print(next_state)
        done = True if self.ep_use_step >= EP_MAXSTEP else done
        self.send_train_get_action(self.state, action, reward, done,
                                   next_state)
        self.state = next_state

        # print('self.env_name=',self.env_name)
        if self.ep >= 30 and RENDER:
            self.env.render()
        if done:
            self.state = self.env.reset()
            self.send_state_get_action(self.state)
예제 #2
0
    UPDATE_EVENT.clear()  # no update now
    ROLLING_EVENT.set()  # start to roll out
    workers = [Worker(wid=i) for i in range(N_WORKER)]

    GLOBAL_UPDATE_COUNTER, GLOBAL_EP = 0, 0
    GLOBAL_RUNNING_R = []
    COORD = tf.train.Coordinator()
    QUEUE = queue.Queue()
    threads = []
    for worker in workers:  # worker threads
        t = threading.Thread(target=worker.work, args=())
        t.start()
        threads.append(t)
    # add a PPO updating thread
    threads.append(threading.Thread(target=GLOBAL_PPO.update, ))
    threads[-1].start()
    COORD.join(threads)

    # plot reward change and testing
    plt.plot(np.arange(len(GLOBAL_RUNNING_R)), GLOBAL_RUNNING_R)
    plt.xlabel('Episode')
    plt.ylabel('Moving reward')
    plt.ion()
    plt.show()
    while True:
        s = env.reset()
        for t in range(400):
            env.render()
            s, _, done = env.step(GLOBAL_PPO.choose_action(s))[0]
            if done == 1:
                break