예제 #1
0
파일: agent.py 프로젝트: romainducrocq/uge
    def training(self, episodes):
        self.env.set_speed_mode(self.env.my_car_id, 0)
        state = None
        steps = 0

        # reward_type = "collision"
        # reward_type = "horizon"
        reward_type = "security_distance"
        speed_limit = True

        plt_data = {
            "collisions": [],
            "space_headway": [],
            "relative_speed": [],
            "speed": [],
            "steps": 0
        }

        while True:
            print(state)
            if state:
                plt_data["space_headway"].append(state.get("space_headway"))
                plt_data["relative_speed"].append(
                    round(state.get("relative_speed") * 3.6, 0))
                plt_data["speed"].append(round(state.get("speed") * 3.6, 0))

                d_t, ds_t, s_t = \
                    self.framing(state.get('space_headway'), self.i_dict_space_headway), \
                    self.framing(state.get('relative_speed'), self.i_dict_relative_speed), \
                    self.framing(state.get('speed'), self.i_dict_speed)

                a = self.e_greedy_policy(d_t, ds_t, s_t)

                q_t = self.q[self.i_dict_space_headway.get(d_t),
                             self.i_dict_relative_speed.get(ds_t),
                             self.i_dict_speed.get(s_t),
                             self.i_dict_action.get(self.action[a])]

                new_speed = self.new_speed(self.action[a], state.get('speed'))
                self.env.set_speed(self.env.my_car_id, new_speed)
                self.env.simulation_step()
                next_state = self.env.get_state(self.env.my_car_id)

                q_max_t1 = None
                if self.env.is_collided(self.env.my_car_id):
                    self.set_reward_collision(reward_type)
                    self.env.set_speed(self.env.my_car_id, 0)
                    q_max_t1 = 0
                    state = None
                    plt_data["collisions"].append(steps)

                elif next_state:
                    """REWARD"""
                    """
                    if reward_type == "horizon":
                        self.set_reward_horizon_speed(next_state.get('space_headway'), next_state.get('speed'), speed_limit)
                    """

                    if reward_type == "security_distance":
                        self.set_reward_security_dist_speed(
                            next_state.get('space_headway'),
                            next_state.get('speed'), speed_limit)

                    print(f"reward {self.reward}")

                    d_t1, ds_t1, s_t1 = \
                        self.framing(next_state.get('space_headway'), self.i_dict_space_headway), \
                        self.framing(next_state.get('relative_speed'), self.i_dict_relative_speed), \
                        self.framing(next_state.get('speed'), self.i_dict_speed)

                    q_max_t1 = np.max(
                        self.q[self.i_dict_space_headway.get(d_t1),
                               self.i_dict_relative_speed.get(ds_t1),
                               self.i_dict_speed.get(s_t1)])

                    state = next_state

                if q_max_t1 is not None:
                    self.q[
                        self.i_dict_space_headway.get(d_t),
                        self.i_dict_relative_speed.get(ds_t),
                        self.i_dict_speed.get(s_t),
                        self.i_dict_action.get(self.action[a])] = \
                        (1 - self.alpha) * q_t + self.alpha * (self.reward + self.gamma * q_max_t1)
                    """ PRINT Q"""
                    print(
                        f"q: {self.q[self.i_dict_space_headway.get(d_t), self.i_dict_relative_speed.get(ds_t), self.i_dict_speed.get(s_t)]}"
                    )

                steps += 1
                self.epsilon_decay(steps)
                # print(steps)
                # print(f"time: {self.env.get_current_time()}")
            else:
                self.env.simulation_step()
                state = self.env.get_state(self.env.my_car_id)
                self.env.set_speed(self.env.my_car_id, 0)

            if steps > (episodes * 10000):
                time.sleep(.1)

            if steps == episodes * 10000:
                plt_data["steps"] = steps
                plotting = Plotting(self, plt_data)
                plotting.plot_()