import os
from builtins import input
import matplotlib.pyplot as plt
from pyniel.pyplot_tools.graph_creator_gui import GraphCreatorGui

from pyIA.arguments import parse_sim_args
from pyIA.simulator import Environment

args = parse_sim_args()
env = Environment(args)
env.populate()

# plot map
worldstate = env.get_worldstate()
fig = plt.figure("map")
contours = worldstate.map.as_closed_obst_vertices()
worldstate.map.plot_contours(contours, '-k')
plt.axis('equal')

gcg = GraphCreatorGui(figure=fig, debug=args.debug)
# load graph if exists
filedir = os.path.expanduser(os.path.expandvars(args.map_folder))
filepath = os.path.join(filedir, args.map_name + "_graph.pickle")
if os.path.isfile(filepath):
    print("Previous graph for map found. Loading...")
    gcg.graph.restore_from_file(filepath)
gcg.run()

graph = gcg.graph

# change node ids
 print("checking for {} scenarios in {}".format(len(scenario_files),
                                                args.scenario_folder))
 print("------------------------------------------------------------")
 for file in scenario_files:
     name, ext = os.path.splitext(file)
     if FILTER not in name:
         continue
     if ext == ".pickle":
         args.scenario = name
         if "asl_office_j" in name:
             args.map_name = "asl_office_j"
         elif "unity_scene_map" in name:
             args.map_name = "unity_scene_map"
         else:
             args.map_name = "asl"
         env = Environment(args, silent=True)
         try:
             env.populate(silent=True)
         except AttributeError as e:
             print("could not populate ", name, "(", e, ")")
             continue
         if not env.worldstate.get_agents():
             print(name, "has no agents!")
             continue
         robot_agent = env.worldstate.get_agents()[0]
         if not isinstance(robot_agent, agents.Robot):
             print(name, "does not have robot as agent 0!")
         agent_index = 0
         state_e, fixed_state = state_estimation.state_estimate_from_sim_worldstate(
             agent_index, env.worldstate)
         path_xy = fixed_state.derived_state.path_to_goal(
    def __init__(self, args, silent=False):
        # Load scenario
        self.args = args
        self.iaenv = Environment(args, silent=silent)
        self.iaenv.populate(silent=silent)
        self.n_sim_agents = len(self.iaenv.worldstate.get_agents())
        self.args.n_agents = self.n_sim_agents # tell rlenv to create an agent for every ia sim agent

        # variables
        self.exit = False  # flag to shutdown all threads if a single thread fails
        self.fixed_state = [None] # a container in order to be shared between envs
        self.sim_start_walltime = None
        self.lock = threading.Lock()
        self.is_in_gesture_episode = False
        self.is_in_speech_episode = False

        # Action and observation space

        # ROS or not?
        self.current_sim_time = 0.
        if args.no_ros:
            pass
        else:
            self.latest_cmd_vel = np.array([0,0,0])
            # Publishers
            self.goalpub = rospy.Publisher(kNavGoalTopic, PoseStamped, queue_size=1)
            self.obstpub = rospy.Publisher(kObstaclesTopic, ObstacleArrayMsg, queue_size=1)
            self.goalreachedpub = rospy.Publisher("/goal_reached", Header, queue_size=1)
            self.clockpub = rospy.Publisher("/clock", Clock, queue_size=1)

        # Sim Env
        self.rlenv = PepperRLEnv(
            args=args,
#             map_=self.iaenv.worldstate.map,  # don't downsample map. MAPS WILL DIFFER
            silent=silent,
        )

        # initialize agent velocities
        for i in range(len(self.rlenv.virtual_peppers)):
            if i == 0:
                continue
            vp = self.rlenv.virtual_peppers[i]
            vp.kMaxVel = np.array([0.8, 0.8, 1.]) # x y theta
            vp.kPrefVel = np.array([0.6, 0.6, 0.75]) # x y theta
            vp.kMaxAcc  = np.array([0.5, 0.5, 0.75]) # x y theta
            vp.kMaxJerk = np.array([1., 1., 2.]) # x y theta

        # Initialize agents according to scenario
        self.xystates0 = self._xystates()
        goalstates0 = np.array([inn.goal for inn in self.iaenv.worldstate.get_innerstates()])
        deltas = goalstates0 - self.xystates0
        thstates = np.arctan2(deltas[:,1], deltas[:,0])
        self.agents_pos0 = np.zeros((self.n_sim_agents, 3))
        self.agents_goals0 = np.zeros((self.n_sim_agents, 3))
        self.agents_pos0[:,:2] = self.xystates0 * 1.
        self.agents_pos0[:,2] = thstates * 1.
        self.agents_goals0[:,:2] = goalstates0 * 1.
        self.agents_goals0[:,2] = thstates * 1.
        # set radii in rlenv
        for i in range(self.n_sim_agents):
            self.rlenv.vp_radii[i] = self.iaenv.worldstate.get_agents()[i].radius

        # Reset environment
        self.reset()

        # spaces
        self.action_space = self.rlenv.action_space
        self.observation_space = self.rlenv.observation_space

        if self.args.realtime:
            self.realtime_rate = WalltimeRate(1. / self.args.dt)

        # wait 3 seconds for everyone to subscribe
        if self.args.delay_start:
            time.sleep(3.)

        # start ROS routines
        if not args.no_ros:
            # Subscribers
            from geometry_msgs.msg import Twist
            if self.args.cmd_vel:
                rospy.Subscriber(kCmdVelTopic, Twist, self.set_cmd_vel_callback, queue_size=1)
            from std_msgs.msg import String
            rospy.Subscriber("/gestures", String, self.gestures_callback, queue_size=1)
            rospy.Subscriber("/speech", String, self.speech_callback, queue_size=1)
            if PUBLISH_GOAL_EVERY != 0:
                rospy.Timer(rospy.Duration(PUBLISH_GOAL_EVERY), self.publish_goal_callback)
            else:
                self.publish_goal_callback()

        # dirty hack for move base
        if self.args.pre_step_once:
            # step environment
            robot_action = np.array([0, 0, 0])
            for i in range(100):
                ob, rew, new, _ = self.step(robot_action, ONLY_FOR_AGENT_0=True)
            self.reset()
            self.publish_goal_callback()
class IARLEnv(object):
    """ This one is tricky as it is used both in RL and non-RL pipelines
    It uses a rlenv to simulate lidars and odometry, and some IA tools to simulate agent behaviors 
    """
    def __init__(self, args, silent=False):
        # Load scenario
        self.args = args
        self.iaenv = Environment(args, silent=silent)
        self.iaenv.populate(silent=silent)
        self.n_sim_agents = len(self.iaenv.worldstate.get_agents())
        self.args.n_agents = self.n_sim_agents # tell rlenv to create an agent for every ia sim agent

        # variables
        self.exit = False  # flag to shutdown all threads if a single thread fails
        self.fixed_state = [None] # a container in order to be shared between envs
        self.sim_start_walltime = None
        self.lock = threading.Lock()
        self.is_in_gesture_episode = False
        self.is_in_speech_episode = False

        # Action and observation space

        # ROS or not?
        self.current_sim_time = 0.
        if args.no_ros:
            pass
        else:
            self.latest_cmd_vel = np.array([0,0,0])
            # Publishers
            self.goalpub = rospy.Publisher(kNavGoalTopic, PoseStamped, queue_size=1)
            self.obstpub = rospy.Publisher(kObstaclesTopic, ObstacleArrayMsg, queue_size=1)
            self.goalreachedpub = rospy.Publisher("/goal_reached", Header, queue_size=1)
            self.clockpub = rospy.Publisher("/clock", Clock, queue_size=1)

        # Sim Env
        self.rlenv = PepperRLEnv(
            args=args,
#             map_=self.iaenv.worldstate.map,  # don't downsample map. MAPS WILL DIFFER
            silent=silent,
        )

        # initialize agent velocities
        for i in range(len(self.rlenv.virtual_peppers)):
            if i == 0:
                continue
            vp = self.rlenv.virtual_peppers[i]
            vp.kMaxVel = np.array([0.8, 0.8, 1.]) # x y theta
            vp.kPrefVel = np.array([0.6, 0.6, 0.75]) # x y theta
            vp.kMaxAcc  = np.array([0.5, 0.5, 0.75]) # x y theta
            vp.kMaxJerk = np.array([1., 1., 2.]) # x y theta

        # Initialize agents according to scenario
        self.xystates0 = self._xystates()
        goalstates0 = np.array([inn.goal for inn in self.iaenv.worldstate.get_innerstates()])
        deltas = goalstates0 - self.xystates0
        thstates = np.arctan2(deltas[:,1], deltas[:,0])
        self.agents_pos0 = np.zeros((self.n_sim_agents, 3))
        self.agents_goals0 = np.zeros((self.n_sim_agents, 3))
        self.agents_pos0[:,:2] = self.xystates0 * 1.
        self.agents_pos0[:,2] = thstates * 1.
        self.agents_goals0[:,:2] = goalstates0 * 1.
        self.agents_goals0[:,2] = thstates * 1.
        # set radii in rlenv
        for i in range(self.n_sim_agents):
            self.rlenv.vp_radii[i] = self.iaenv.worldstate.get_agents()[i].radius

        # Reset environment
        self.reset()

        # spaces
        self.action_space = self.rlenv.action_space
        self.observation_space = self.rlenv.observation_space

        if self.args.realtime:
            self.realtime_rate = WalltimeRate(1. / self.args.dt)

        # wait 3 seconds for everyone to subscribe
        if self.args.delay_start:
            time.sleep(3.)

        # start ROS routines
        if not args.no_ros:
            # Subscribers
            from geometry_msgs.msg import Twist
            if self.args.cmd_vel:
                rospy.Subscriber(kCmdVelTopic, Twist, self.set_cmd_vel_callback, queue_size=1)
            from std_msgs.msg import String
            rospy.Subscriber("/gestures", String, self.gestures_callback, queue_size=1)
            rospy.Subscriber("/speech", String, self.speech_callback, queue_size=1)
            if PUBLISH_GOAL_EVERY != 0:
                rospy.Timer(rospy.Duration(PUBLISH_GOAL_EVERY), self.publish_goal_callback)
            else:
                self.publish_goal_callback()

        # dirty hack for move base
        if self.args.pre_step_once:
            # step environment
            robot_action = np.array([0, 0, 0])
            for i in range(100):
                ob, rew, new, _ = self.step(robot_action, ONLY_FOR_AGENT_0=True)
            self.reset()
            self.publish_goal_callback()

    # Timeekeeping
    def reset_time(self):
        self.current_sim_time = 0.
        self.sim_start_walltime = None
    def increment_time(self):
        self.current_sim_time += self.args.dt
        if self.args.no_ros:
            pass
        else:
            cmsg = Clock()
            cmsg.clock = self.get_sim_time()
            self.clockpub.publish(cmsg)
        # store the moment the simulation was started
        if self.sim_start_walltime is None:
            self.sim_start_walltime = self.get_walltime()
    def get_walltime(self):
        if self.args.no_ros:
            return time.time()
        else:
            return rospy.Time.from_sec(time.time())
    def get_walltime_since_sim_start_sec(self):
        if self.sim_start_walltime is None:
            return 0
        if self.args.no_ros:
            return self.get_walltime() - self.sim_start_walltime
        else:
            return (self.get_walltime() - self.sim_start_walltime).to_sec()
    def get_sim_time(self):
        if self.args.no_ros:
            return self.current_sim_time
        else:
            return rospy.Time.from_sec(self.current_sim_time)
    def get_sim_time_sec(self):
        return self.current_sim_time
    def get_max_task_alotted_time(self):
        if self.args.no_ros:
            return MAX_TASK_ALOTTED_TIME
        else:
            return rospy.Duration(MAX_TASK_ALOTTED_TIME)
    def sleep_for_simtime(self, duration):
        EXPECTED_SIM_SPEED_FACTOR = 10.  # if the sim is much faster, needs to be increased
        start = self.get_sim_time_sec()
        while self.get_sim_time_sec() < (start + duration):
            time.sleep(duration / EXPECTED_SIM_SPEED_FACTOR)

    # ROS publishing methods
    def publish_goal(self, goal):
        if self.args.no_ros or self.args.no_goal:
            return
        else:
            from tf.transformations import quaternion_from_euler
            from geometry_msgs.msg import Quaternion
            from geometry_msgs.msg import PoseStamped
            print("Publishing goal")
            posemsg = PoseStamped()
            posemsg.header.frame_id = kFixedFrame
            posemsg.pose.position.x = goal[0]
            posemsg.pose.position.y = goal[1]
            quaternion = quaternion_from_euler(0, 0, goal[2])
            posemsg.pose.orientation = Quaternion(*quaternion)
            self.goalpub.publish(posemsg)
    def publish_obstacle_msg(self):
        if self.args.no_ros:
            return
        else:
            from tf.transformations import quaternion_from_euler
            from geometry_msgs.msg import Point32, Quaternion
            from costmap_converter.msg import ObstacleArrayMsg, ObstacleMsg
            obstacles_msg = ObstacleArrayMsg()
            obstacles_msg.header.stamp =  self.get_sim_time()
            obstacles_msg.header.frame_id = kFixedFrame
            for i in range(1, self.n_sim_agents):
                # Add point obstacle
                obst = ObstacleMsg()
                obst.id = i
                obst.polygon.points = [Point32()]
                obst.polygon.points[0].x = self.rlenv.virtual_peppers[i].pos[0]
                obst.polygon.points[0].y = self.rlenv.virtual_peppers[i].pos[1]
                obst.polygon.points[0].z = 0

                obst.radius = self.rlenv.vp_radii[i]

                yaw = self.rlenv.virtual_peppers[i].pos[2]
                q = quaternion_from_euler(0,0,yaw)
                obst.orientation = Quaternion(*q)

                obst.velocities.twist.linear.x = self.rlenv.virtual_peppers[i].vel[0]
                obst.velocities.twist.linear.y = self.rlenv.virtual_peppers[i].vel[1]
                obst.velocities.twist.linear.z = 0
                obst.velocities.twist.angular.x = 0
                obst.velocities.twist.angular.y = 0
                obst.velocities.twist.angular.z = self.rlenv.virtual_peppers[i].vel[2]
                obstacles_msg.obstacles.append(obst)
            self.obstpub.publish(obstacles_msg)
            return

    # callbacks
    def set_cmd_vel_callback(self, msg):
        if self.args.no_ros:
            raise NotImplementedError
        elif self.args.cmd_vel:
            self.latest_cmd_vel = np.array([msg.linear.x, msg.linear.y, msg.angular.z])
            return
        else:
            return
    def publish_goal_callback(self, event=None):
        self.publish_goal(self.rlenv.agent_goals[0])
    def gestures_callback(self, msg):
        gesture = msg.data
        if gesture == "animations/Stand/Gestures/Desperate_4":
            print("starting gesture episode")
            self.is_in_gesture_episode = True
            self.sleep_for_simtime(5.)
            self.is_in_gesture_episode = False
            print("ending gesture episode")
    def speech_callback(self, msg):
        speech = msg.data
        if "xcuse me" in speech:
            print("starting speech episode")
            self.is_in_speech_episode = True
            self.sleep_for_simtime(5.)
            self.is_in_speech_episode = False
            print("ending speech episode")
    def task_update_routine(self):
        import multiprocessing
        data_queue = multiprocessing.Queue()
        is_done_queue = multiprocessing.Queue()
        rate = WalltimeRate(10)
        # get fixed_state once and for all
        if True:  # replace this with for loop if separate fixed_state for each agent
            agent_index = 0
            _, self.fixed_state[0] = state_estimation.state_estimate_from_sim_worldstate(
                agent_index, self.iaenv.worldstate)
            # otherwise subprocesses keep recomputing
            _ = self.fixed_state[0].derived_state.map_sdf(self.fixed_state[0])
        while True:
            tic = timer()
            t = multiprocessing.Process(target=self.update_agent_tasks, args=(data_queue,is_done_queue))
            t.start()
            toc = timer()
            if False:
                print("Process creation: {}ms".format(1000 * (toc-tic)))
            # check whether process is finished
            is_done_queue.get()
            # extract output data from process and apply changes
            while not data_queue.empty():
                update = data_queue.get()
                is_update = update[1]
                if is_update:
                    (agent_index, _, time_now, state_e, task) = update
                    self.state_estimate_times[agent_index] = time_now
                    self.state_estimates[agent_index] = unserialize_cstate(state_e)
                    self.current_tasks[agent_index] = task
                    self.task_start_times[agent_index] = self.get_sim_time()
            #
            t.join(timeout=1.)
            rate.sleep()
            if self.exit:
                break


    # Other Methods
    def get_agent_radius(self, agent_index):
        return self.iaenv.worldstate.get_agents()[agent_index].radius
    def _xystates(self):
        return np.array(self.iaenv.worldstate.get_xystates())
    def _permissivities(self):
        return [s.permissivity for s in self.iaenv.worldstate.get_innerstates()]
    def reset_loop(self):
        for i in range(1, self.n_sim_agents):
            self.task_start_times[i] = None
            self.current_tasks[i] = None
        self.collision_episodes[:,:] = 0
        # reset data in the two simulators
        for i in range(1, self.n_sim_agents):
            self.iaenv.worldstate.get_xystates()[i] = self.xystates0[i] * 1.
        new_agents_pos = list(self.agents_pos0 * 1.)
        new_agents_pos[0] = None
        self.rlenv._set_agents_pos(new_agents_pos)
    def rewind_robot(self):
        """ Basically, set robot goal to start position, set starting position to previous goal"""
        self.state_estimate_times[0] = None
        self.state_estimates[0] = None
        self.task_start_times[0] = None
        self.current_tasks[0] = None
        self.collision_episodes[0,:] = 0
        self.collision_episodes[:,0] = 0
        # reset data in the two simulators
        self.iaenv.worldstate.get_innerstates()[0].goal = self.next_robot_goal[:2] * 1.
        new_agents_goals = [None for _ in range(self.n_sim_agents)]
        new_agents_goals[0] = self.next_robot_goal * 1.
        self.rlenv._set_agents_goals(new_agents_goals)
        self.publish_goal(new_agents_goals[0])
        # swap next with current
        temp = self.next_robot_goal * 1.
        self.next_robot_goal = self.current_agent_goals[0] * 1.
        self.current_agent_goals[0] = temp * 1.

    def update_agent_task(self, agent_index):
        current_task = self.current_tasks[agent_index]
        last_se_time = self.state_estimate_times[agent_index]
        time_now = self.get_sim_time_sec()
        last_state = self.state_estimates[agent_index]
        worldstate = self.iaenv.worldstate
        fixed_state = self.fixed_state[0]
        are_goals_reached = self.rlenv.are_goals_reached()
        args = self.args
        # ---------------------------
        if current_task is not None:
            return (agent_index, False)
        # Get state estimate
        time_elapsed = None
        if last_se_time is not None:
            time_elapsed = time_now - last_se_time
        if args.naive_plan and fixed_state is not None and agent_index != 0 and STATELESS_NAIVE_PLAN:
            # create fake state estimate (ignore agents and knowledge) for speedup
            pos_xy = worldstate.get_xystates()[agent_index]  # + Noise?
            pos_ij = worldstate.map.xy_to_floatij([pos_xy])[0]
            from cia import CState
            state_e = CState(
                radius=worldstate.get_agents()[agent_index].radius,
                pos=np.array(pos_xy, dtype=np.float32),
                pos_ij=np.array(pos_ij, dtype=np.float32),
                mapwidth=worldstate.map.occupancy().shape[0],
                mapheight=worldstate.map.occupancy().shape[1],
                goal=np.array(worldstate.get_innerstates()[agent_index].goal, dtype=np.float32),
            )
        else:
            state_e, fixed_state = state_estimation.state_estimate_from_sim_worldstate(
                agent_index, worldstate,
                state_memory=last_state,
                time_elapsed=time_elapsed,
                reuse_state_cache=fixed_state)
        # if agent is at goal, switch to loiter mode
        goalstate = np.array(worldstate.get_innerstates()[agent_index].goal)
        if are_goals_reached[agent_index]:

            if self.args.verbose:
                print("{:.1f} | {}: Agent {} | goal reached.".format(
                    self.get_walltime_since_sim_start_sec(), self.get_sim_time_sec(), agent_index))
            task = {
                'tasktargetpathidx': 0,
                'taskfullpath': np.array([goalstate]),
                'taskaction': actions.Loiter(),  # action should go here but we want things serializable
            }
        else:
            # get task using ia planning
            possible_actions = SIM_AGENTS_POSSIBLE_ACTIONS
            if agent_index == 0:
                possible_actions = ROBOT_POSSIBLE_ACTIONS
            tic = timer()
            if args.naive_plan and agent_index != 0:
                from pyIA import paths
                agent_ij = state_e.get_pos_ij()
                goal_ij = fixed_state.map.xy_to_ij([state_e.get_goal()])[0]
                dijkstra = None
                if self.dijkstra_caches[agent_index] is not None:
                    if np.allclose(goal_ij, self.dijkstra_caches[agent_index]["goal_ij"]):
                        dijkstra = self.dijkstra_caches[agent_index]["field"]
                if dijkstra is None:
                    fixed_state.derived_state.suppress_warnings()
                    dijkstra = fixed_state.derived_state.dijkstra_from_goal(state_e, fixed_state, paths.NaivePath())
                    self.dijkstra_caches[agent_index] = {"goal_ij": goal_ij, "field": dijkstra}
                from CMap2D import path_from_dijkstra_field
                path_ij, _ = path_from_dijkstra_field(dijkstra, agent_ij, connectedness=8)
                path_xy = fixed_state.map.ij_to_xy(path_ij)
                paths_xy = [np.array(path_xy, dtype=np.float32)]
#                 possible_path_variants = [paths.NaivePath()]
#                 paths_xy, path_variants = ia_planning.path_options(
#                     state_e, fixed_state, possible_path_variants=possible_path_variants)
                taskpath = paths_xy[0]
                task = {
                    'taskfullpath': taskpath,
                    'tasktargetpathidx': len(taskpath)-1,
                    'taskaction': actions.Intend(),
                }
                optimistic_sequence = [[task, None]]
            else:
                byproducts = {}
                pruned_stochastic_tree, optimistic_sequence, firstfailure_sequence = \
                    ia_planning.plan(
                        state_e, fixed_state, possible_actions,
                        n_trials=args.n_trials, max_depth=args.max_depth,
                        BYPRODUCTS=byproducts,
                    )
            planning_time = timer()-tic
            # DEBUG
            if args.debug:
                if agent_index == 0:
                    ia_planning.visualize_planning_process(
                        state_e, fixed_state, possible_actions,
                        pruned_stochastic_tree, optimistic_sequence, firstfailure_sequence, byproducts,
                        env=self.iaenv,
                    )
                    plt.show()
            # Assign task
            task = optimistic_sequence[0][0]
            task['tasktargetpathidx'] = min(10, task['tasktargetpathidx']) # limit task length in space
            if self.args.verbose:
                print("{:.1f} | {}: Agent {} | planning: {:.1f}s | new task: {}".format(
                    self.get_walltime_since_sim_start_sec(), self.get_sim_time_sec(), agent_index,
                    planning_time, task['taskaction'].typestr()))
        # direct update (does nothing if this is running in subprocess)
        self.state_estimate_times[agent_index] = time_now
        self.state_estimates[agent_index] = state_e
        self.fixed_state[0] = fixed_state
        self.current_tasks[agent_index] = task
        self.task_start_times[agent_index] = self.get_sim_time()
        # return values are unused unless multiprocessing is enabled
        return (agent_index, True, time_now, state_e.serialize(), task)
    def update_agent_tasks(self, queue=None, is_done_queue=None):
        for i in range(1, self.n_sim_agents):
            result = self.update_agent_task(i)
            if queue is not None:
                queue.put(result)
        if is_done_queue is not None:
            is_done_queue.put("done")
            queue.close()
            is_done_queue.close()

    def reset_task(self, agent_index):
        self.current_tasks[agent_index] = None
        self.task_start_times[agent_index] = None
    def execute_current_task(self, agent_index):
        # TODO run a task-specific controller instead
        task = self.current_tasks[agent_index]
        task_start = self.task_start_times[agent_index]
        if task is None or task_start is None:
            return np.array([0,0,0])
        # get action from current task
        N = int(NAIVE_AGENT_LIVE_WAYPOINT_DISTANCE / self.iaenv.worldstate.map.resolution())
        waypoint_idx = min(task['tasktargetpathidx'], N)
        waypoint = task['taskfullpath'][waypoint_idx]
        vp_pos = self.rlenv.virtual_peppers[agent_index].pos
        waypoint_in_vp_frame = pose2d.apply_tf(waypoint, pose2d.inverse_pose2d(vp_pos))
        theta = np.arctan2(waypoint_in_vp_frame[1], waypoint_in_vp_frame[0])
        delta = np.array([waypoint_in_vp_frame[0], waypoint_in_vp_frame[1], theta])
        action = delta
        # apply actions to iaenv, such as SAY
#         TODO
        # end task
        task_elapsed = self.get_sim_time() - task_start
        if task_elapsed > self.get_max_task_alotted_time():
            self.reset_task(agent_index)
            if self.args.verbose:
                print("{:.1f} | {}: Agent {} | task timed-out".format(
                    self.get_walltime_since_sim_start_sec(), self.get_sim_time_sec(), agent_index))
            # reset blocked collision episodes for agent (new chance to be allowed through)
            for j in range(self.n_sim_agents):
                if agent_index == j:
                    continue
                if self.collision_episodes[agent_index, j] == 1:
                    self.collision_episodes[agent_index, j] = 0
                    self.collision_episodes[j, agent_index] = 0
        # we should stop, but I kind of like the idle movement. just slow down.
        if isinstance(task['taskaction'], actions.Loiter):
            action = action * 0.8
        else:
            # check if task succeeded
            if np.linalg.norm(vp_pos[:2] - waypoint) < self.get_agent_radius(agent_index):
                self.reset_task(agent_index)
                action = np.array([0,0,0])
                if self.args.verbose:
                    print("Agent {} | task succeeded".format(agent_index))
        return action
    def execute_current_tasks(self, robot_action):
        actions = np.zeros((self.n_sim_agents, 3)) # cmd_vel in SI units
        for i in range(1, self.n_sim_agents):
            actions[i, :] = self.execute_current_task(i)
        # convert actions
        if not self.args.cmd_vel:
            actions = self.rlenv.cmd_vel_to_normalized(actions)
        if not self.args.continuous:
            actions = self.rlenv.continuous_to_categorical(actions)
        actions[0, ...] = robot_action
        return actions

    def collision_update(self):
        """ checks if collision episodes are started or ended. A collision episode starts when
        agents get closer than a fixed threshold, and end when they are no longer inside this
        threshold.

        Pass-through probability is sampled once at the start of each episode,
        or when either agent's task is updated. Once sampled, it remains constant for the whole
        episode.
        """
        for i in range(self.n_sim_agents):
            for j in range(self.n_sim_agents):
                if i >= j: # avoid checking twice
                    continue
                xi = self._xystates()[i]
                ri = self.get_agent_radius(i)
                pi = self._permissivities()[i]
                xj = self._xystates()[j]
                rj = self.get_agent_radius(j)
                pj = self._permissivities()[j]
                if i == 0:  # robot
                    if self.is_in_gesture_episode:
                        # model pass-through behavior
                        self.collision_episodes[i,j] = 2
                        self.collision_episodes[j,i] = 2
                        continue
                    if self.is_in_speech_episode:
                        # model average positive reaction to speech
                        pi = np.sqrt(pi)
                        pj = np.sqrt(pj)
                if np.linalg.norm(xj - xi) < (2*rj + 2*ri):
                    if self.collision_episodes[i,j] == 0:
                        # enter collision episode
                        pmin = np.clip(min(pi, pj), 0, 1)
                        # sample collision
                        sample = np.random.random()
                        if sample < pmin:
                            # pass through
                            self.collision_episodes[i,j] = 2
                            self.collision_episodes[j,i] = 2
                        else:
                            # collision
                            self.collision_episodes[i,j] = 1
                            self.collision_episodes[j,i] = 1
#                         print(self._permissivities())
#                         print("episode | {} {} | pmin {} | sample {}".format(i, j, pmin, sample))
                else:
                    # exit collision episode
                    self.collision_episodes[i,j] = 0
                    self.collision_episodes[j,i] = 0
        if self.args.no_pass_through:
            self.collision_episodes[0,:] = 0
            self.collision_episodes[:,0] = 0
        disabled_collisions = self.collision_episodes == 2
        return disabled_collisions

    # RL Interface
    def reset(self, agents_mask=None, ONLY_FOR_AGENT_0=False):
        _ = self.rlenv.reset(agents_mask=agents_mask)


        # reset to scene definition
        self.current_agent_goals = self.agents_goals0 * 1.
        self.next_robot_goal = self.agents_pos0[0] * 1. # set future goal to start position
        self.rlenv._set_agents_pos(self.agents_pos0 * 1.)
        self.rlenv._set_agents_goals(self.current_agent_goals * 1.)
        self.rlenv.DETERMINISTIC_GOAL = lambda i : self.agents_goals0[i] * 1.
        # Simulator state
        self.reset_time()
        self.state_estimate_times = [None for _ in range(self.n_sim_agents)]
        self.state_estimates = [None for _ in range(self.n_sim_agents)]
        self.dijkstra_caches = [None for _ in range(self.n_sim_agents)]
        self.task_start_times = [None for _ in range(self.n_sim_agents)]
        self.current_tasks = [None for _ in range(self.n_sim_agents)]
        self.collision_episodes = np.zeros((self.n_sim_agents, self.n_sim_agents)) # 0 none 1 collision 2 passthrough

        ob, rew, new, _ =  self.rlenv.step(None, ONLY_FOR_AGENT_0=ONLY_FOR_AGENT_0)
        # ob is [lidar_obs, robot_obs, (relobst_obs)]

        if ONLY_FOR_AGENT_0:
            pass
        else:
            ob = [obs[:1] for obs in ob]
        return ob
    def n_agents(self):
        """ returns the number of steerable agents in the environment """
        return 1
    def step(self, robot_action,
             ONLY_FOR_AGENT_0=False,
             ):
        """ robot_action is in the units expected by PepperRLEnv (see observation_space) """
        must_break = False
        if not self.args.plan_async:
            self.update_agent_tasks()
        # drive agents
        if robot_action is None:
            actions = None # time does not advance
        else:
            actions = self.execute_current_tasks(robot_action)
        # collisions
        disabled_collisions = self.collision_update()
        # step
        ob, rew, new, info = self.rlenv.step(actions,
             BYPASS_SI_UNITS_CONVERSION=self.args.cmd_vel,
             DISABLE_A2A_COLLISION=disabled_collisions,
             ONLY_FOR_AGENT_0=ONLY_FOR_AGENT_0)
        are_goals_reached = self.rlenv.are_goals_reached()
        self.publish_obstacle_msg()
        if robot_action is not None:
            self.increment_time()
        # update iaenv worldstate
        for i in range(self.n_sim_agents):
            self.iaenv.worldstate.get_xystates()[i] = self.rlenv.virtual_peppers[i].pos[:2]
        # all agents (robot excluded) have reached their goal
        if np.all(are_goals_reached[1:]):
            self.reset_loop()
        if are_goals_reached[0] == 1:
            if self.args.verbose:
                print("Agent 0 : goal reached.")
            if not self.args.no_ros:
                msg = Header()
                msg.stamp = self.get_sim_time()
                msg.frame_id = "goal 0 is reached"
                self.goalreachedpub.publish(msg,)
                time.sleep(0.1)
            if self.args.shutdown_on_success:
                self.shutdown()
            else:
                self.rewind_robot()
        if self.current_sim_time >= self.args.max_runtime:
            self.shutdown()
        if ONLY_FOR_AGENT_0:
            pass
        else:
            ob = [obs[:1] for obs in ob]
            rew = rew[:1]
            new = new[:1]
        return ob, rew, new, info

    # Other Interface
    def run(self):
        """ typically when run in ROS, this starts the simulator update routine """
        try:
            if self.args.plan_async:
                import threading
                threading.Thread(target=self.task_update_routine).start()
            while True:
                if self.exit:
                    break
                # Get robot action
                if self.args.autopilot:
                    # drive robot internally
                    self.update_agent_task(0)
                    robot_action = self.execute_current_task(0)
                elif self.args.cmd_vel:
                    robot_action = self.latest_cmd_vel * 1.
                else:
                    raise NotImplementedError("Robot control mode not specified (e.g. --cmd-vel or --autopilot)")
                # step environment
                ob, rew, new, _ = self.step(robot_action, ONLY_FOR_AGENT_0=True)
                if new:
                    break
                if self.args.realtime:
                    wall_t = self.get_walltime_since_sim_start_sec()
                    sim_t = self.get_sim_time_sec()
                    if sim_t < (wall_t - 1):
                        rospy.logwarn_throttle(5., "IARLEnv missed realtime rate.")
                    elif sim_t > (wall_t + 0.1):
                        time.sleep(sim_t - (wall_t + 0.09))
        except KeyboardInterrupt:
            if self.args.plan_async:
                self.exit = True
            if not self.args.no_ros:
                self.rlenv._ros_shutdown("SIGINT")

    def shutdown(self):
        if not self.args.no_ros:
            rospy.signal_shutdown("OK")
        self.exit = True

    def render(self, *args, **kwargs):
        self.rlenv.render(*args, **kwargs)

    def seed(self, seed):
        pass
        # TODO

    def close(self):
        self.shutdown()
        self.rlenv.close()

    def _get_viewer(self):
        return self.rlenv.viewer
        else:
            self.bperm.color = self.bperm.hovercolor

        # figure title
        if self.selected is not None:
            self.main_axes.set_title("""selected mode: middle click to set agent goal, left click to create new agent,
                               left click on agent to toggle selection, right click to unselect""")
        else:
            self.main_axes.set_title(
                "left click to create agent, right click to delete, click agent to select")
        if self.agent_creation_mode[0]:
            self.main_axes.set_title("""agent creation mode: left click to confirm,
                                     middle click to cycle agent type, right click to cancel""")
        # update
        self.fig.canvas.draw()


if __name__ == "__main__":
    args = parse_sim_args()
    env = Environment(args)
    env.populate()

    weg = WorldEditorGui(env, debug=args.debug)
    weg.run()

    yesno = input("Save worldstate to file? [Y/n]")
    if yesno not in ["n", "N", "no", "NO"]:
        name = input("Filename for worldstate: \n>>")
        filepath = os.path.join(args.scenario_folder, name + ".pickle")
        env.worldstate.save_agents_to_file(filepath)
예제 #6
0
import matplotlib

from pyIA.arguments import parse_sim_args
from pyIA.simulator import Environment
import pyIA.agents as agents
import pyIA.state_estimation as state_estimation
import pyIA.paths as paths
import pyIA.ia_planning as ia_planning

matplotlib.rcParams['mathtext.fontset'] = 'stix'
matplotlib.rcParams['font.family'] = 'STIXGeneral'

mapnames = ["asl", "unity_scene_map", "asl_office_j"]

# map should be given as arg

if __name__ == "__main__":
    args = parse_sim_args()
    print("------------------------------------------------------------")
    plt.figure()
    for mapname in mapnames:
        print(mapname)
        args.map_name = mapname
        env = Environment(args, silent=True)
        contours = env.worldstate.map.as_closed_obst_vertices()
        env.worldstate.map.plot_contours(contours)
        plt.axis('off')
    plt.gca().set_xticklabels([])
    plt.gca().set_yticklabels([])
    plt.show()
from timeit import default_timer as timer
import matplotlib.pyplot as plt
import matplotlib
from pyniel.pyplot_tools.realtime import plotpause, plotshow
from pyniel.pyplot_tools.windows import movefigure
from pyniel.pyplot_tools.colormaps import Greys_c10
from CMap2D import path_from_dijkstra_field

from pyIA.arguments import parse_sim_args
from pyIA.simulator import Environment, Worldstate

args = parse_sim_args()
env = Environment(args)

# Add agents and precompute dijkstras
tic=timer()
env.populate()
toc=timer()
print("Environment populated. ({:.2f}s)".format(toc-tic))

# Visuals
# fig = plt.figure("situation")
# movefigure(fig, (100, 100))
# fig = plt.figure("dijkstra")
# movefigure(fig, (500, 100))
# fig = plt.figure("density")
# movefigure(fig, (100, 500))
try:
    for i in range(100):
        S_w = env.fullstate()
        actions = env.policies(S_w)