def training(self, episodes): self.env.set_speed_mode(self.env.my_car_id, 0) state = None steps = 0 # reward_type = "collision" # reward_type = "horizon" reward_type = "security_distance" speed_limit = True plt_data = { "collisions": [], "space_headway": [], "relative_speed": [], "speed": [], "steps": 0 } while True: print(state) if state: plt_data["space_headway"].append(state.get("space_headway")) plt_data["relative_speed"].append( round(state.get("relative_speed") * 3.6, 0)) plt_data["speed"].append(round(state.get("speed") * 3.6, 0)) d_t, ds_t, s_t = \ self.framing(state.get('space_headway'), self.i_dict_space_headway), \ self.framing(state.get('relative_speed'), self.i_dict_relative_speed), \ self.framing(state.get('speed'), self.i_dict_speed) a = self.e_greedy_policy(d_t, ds_t, s_t) q_t = self.q[self.i_dict_space_headway.get(d_t), self.i_dict_relative_speed.get(ds_t), self.i_dict_speed.get(s_t), self.i_dict_action.get(self.action[a])] new_speed = self.new_speed(self.action[a], state.get('speed')) self.env.set_speed(self.env.my_car_id, new_speed) self.env.simulation_step() next_state = self.env.get_state(self.env.my_car_id) q_max_t1 = None if self.env.is_collided(self.env.my_car_id): self.set_reward_collision(reward_type) self.env.set_speed(self.env.my_car_id, 0) q_max_t1 = 0 state = None plt_data["collisions"].append(steps) elif next_state: """REWARD""" """ if reward_type == "horizon": self.set_reward_horizon_speed(next_state.get('space_headway'), next_state.get('speed'), speed_limit) """ if reward_type == "security_distance": self.set_reward_security_dist_speed( next_state.get('space_headway'), next_state.get('speed'), speed_limit) print(f"reward {self.reward}") d_t1, ds_t1, s_t1 = \ self.framing(next_state.get('space_headway'), self.i_dict_space_headway), \ self.framing(next_state.get('relative_speed'), self.i_dict_relative_speed), \ self.framing(next_state.get('speed'), self.i_dict_speed) q_max_t1 = np.max( self.q[self.i_dict_space_headway.get(d_t1), self.i_dict_relative_speed.get(ds_t1), self.i_dict_speed.get(s_t1)]) state = next_state if q_max_t1 is not None: self.q[ self.i_dict_space_headway.get(d_t), self.i_dict_relative_speed.get(ds_t), self.i_dict_speed.get(s_t), self.i_dict_action.get(self.action[a])] = \ (1 - self.alpha) * q_t + self.alpha * (self.reward + self.gamma * q_max_t1) """ PRINT Q""" print( f"q: {self.q[self.i_dict_space_headway.get(d_t), self.i_dict_relative_speed.get(ds_t), self.i_dict_speed.get(s_t)]}" ) steps += 1 self.epsilon_decay(steps) # print(steps) # print(f"time: {self.env.get_current_time()}") else: self.env.simulation_step() state = self.env.get_state(self.env.my_car_id) self.env.set_speed(self.env.my_car_id, 0) if steps > (episodes * 10000): time.sleep(.1) if steps == episodes * 10000: plt_data["steps"] = steps plotting = Plotting(self, plt_data) plotting.plot_()