class MobileAvoidance(EnvSpace): def env_init(self): self.env = CarEnv() self.state = self.env.reset() self.send_state_get_action(self.state) self.var = 1 def on_predict_response(self, action): self.var = self.var * 0.9995 if self.ep_use_step > cfg['DDPG'][ 'memory_capacity'] else self.var a = np.clip(np.random.normal(action, self.var), *self.env.action_bound) next_state, reward, done, _ = self.env.step(action) # print(next_state) done = True if self.ep_use_step >= EP_MAXSTEP else done self.send_train_get_action(self.state, action, reward, done, next_state) self.state = next_state # print('self.env_name=',self.env_name) if self.ep >= 30 and RENDER: self.env.render() if done: self.state = self.env.reset() self.send_state_get_action(self.state)
UPDATE_EVENT.clear() # no update now ROLLING_EVENT.set() # start to roll out workers = [Worker(wid=i) for i in range(N_WORKER)] GLOBAL_UPDATE_COUNTER, GLOBAL_EP = 0, 0 GLOBAL_RUNNING_R = [] COORD = tf.train.Coordinator() QUEUE = queue.Queue() threads = [] for worker in workers: # worker threads t = threading.Thread(target=worker.work, args=()) t.start() threads.append(t) # add a PPO updating thread threads.append(threading.Thread(target=GLOBAL_PPO.update, )) threads[-1].start() COORD.join(threads) # plot reward change and testing plt.plot(np.arange(len(GLOBAL_RUNNING_R)), GLOBAL_RUNNING_R) plt.xlabel('Episode') plt.ylabel('Moving reward') plt.ion() plt.show() while True: s = env.reset() for t in range(400): env.render() s, _, done = env.step(GLOBAL_PPO.choose_action(s))[0] if done == 1: break