Beispiel #1
0
    def __init__(self,
                 robot_name_space,
                 controllers_list,
                 reset_controls,
                 start_init_physics_parameters=True,
                 reset_world_or_sim="SIMULATION",
                 **kwargs):

        env_id = kwargs.get('env_id', 1)
        self.env_id = env_id
        self.robotID = kwargs.get('robotID', 1)
        self.num_robots = kwargs.get('num_robots', 1)
        bashCommand = 'hostname -I | cut -d' ' -f1)'
        ROSIP = socket.gethostbyname(socket.gethostname())
        if env_id >= 0:
            #os.environ['ROS_PACKAGE_PATH']=ROS_PACKAGE_PATH
            #@HACK Setting ros and gazebo masters can be automated
            #the gazebo master is different for different environment IDs as they can run on multiple computers
            os.environ['ROS_MASTER_URI'] = "http://" + ROSIP + ":1131" + str(
                env_id)[0]
            if env_id < 3:
                GAZEBOIP = ROSIP
                os.environ[
                    'GAZEBO_MASTER_URI'] = "http://" + GAZEBOIP + ":1135" + str(
                        env_id)[0]
            else:
                GAZEBOIP = ROSIP
                os.environ[
                    'GAZEBO_MASTER_URI'] = "http://" + GAZEBOIP + ":1135" + str(
                        env_id)[0]
            os.environ['ROS_IP'] = ROSIP
            os.environ['ROS_HOSTNAME'] = ROSIP
            rospy.init_node('firefly_env_' + str(env_id)[0] +
                            str(self.robotID),
                            anonymous=True,
                            disable_signals=True)
            print("WORKER NODE " + str(env_id)[0] + str(self.robotID))

        # To reset Simulations
        rospy.logdebug("START init RobotGazeboEnv")
        self.gazebo = GazeboConnection(start_init_physics_parameters,
                                       reset_world_or_sim, self.robotID)
        self.controllers_object = ControllersConnection(
            namespace=robot_name_space, controllers_list=controllers_list)
        self.reset_controls = reset_controls
        self.seed()

        # Set up ROS related variables
        self.episode_num = 0
        self.cumulated_episode_reward = 0
        #        self.reward_pub = rospy.Publisher('/openai/reward', RLExperimentInfo, queue_size=1)
        rospy.logdebug("END init RobotGazeboEnv")
Beispiel #2
0
    def __init__(self, robot_name_space, controllers_list, reset_controls):

        # To reset Simulations
        print("Entered Gazebo Env")
        self.gazebo = GazeboConnection(start_init_physics_parameters=False,
                                       reset_world_or_sim=True)
        self.controllers_object = ControllersConnection(
            namespace=robot_name_space, controllers_list=controllers_list)
        self.reset_controls = reset_controls
        print(self.reset_controls)
        self.seed()

        # Set up ROS related variables
        self.episode_num = 0
    def __init__(self, reset_type: str = 'SIMULATION'):
        """
        Initialize RosbotGazeboEnv class

        Parameters
        ----------
        reset_type: str
            This paremeter is used for creating GazeboConnection instance
            Possible values are: ['SIMULATION', 'WORLD']

        """

        # create GazeboConnection instance
        self.gazebo = GazeboConnection(reset_type=reset_type)

        self.seed()
Beispiel #4
0
    def __init__(self, world_file_name, robot_number=0):
        """
        This Task Env is designed for having the TurtleBot2 in some kind of maze.
        It will learn how to move around the maze without crashing.
        """

        # Only variable needed to be set here
        self.world_file_name = world_file_name
        number_actions = rospy.get_param('/turtlebot2/n_actions', 144)
        self.action_space = spaces.Discrete(number_actions)

        # We set the reward range, which is not compulsory but here we do it.
        self.reward_range = (-numpy.inf, numpy.inf)

        #number_observations = rospy.get_param('/turtlebot2/n_observations')
        """
        We set the Observation space for the 6 observations
        cube_observations = [
            round(current_disk_roll_vel, 0),
            round(y_distance, 1),
            round(roll, 1),
            round(pitch, 1),
            round(y_linear_speed,1),
            round(yaw, 1),
        ]
        """

        # Actions and Observations
        self.dec_obs = rospy.get_param(
            "/turtlebot2/number_decimals_precision_obs", 1)
        self.linear_forward_speed = rospy.get_param(
            '/turtlebot2/linear_forward_speed', 1)
        self.linear_turn_speed = rospy.get_param(
            '/turtlebot2/linear_turn_speed', 0.2)
        self.angular_speed = rospy.get_param('/turtlebot2/angular_speed', 0.1)
        self.init_linear_forward_speed = rospy.get_param(
            '/turtlebot2/init_linear_forward_speed', 0.3)
        self.init_linear_turn_speed = rospy.get_param(
            '/turtlebot2/init_linear_turn_speed', 0.4)

        self.n_laser_discretization = rospy.get_param(
            '/turtlebot2/n_laser_discretization', 128)
        self.n_observations = rospy.get_param('/turtlebot2/n_observations',
                                              144)
        self.min_range = rospy.get_param('/turtlebot2/min_range', 0.3)
        self.max_cost = rospy.get_param('/turtlebot2/max_cost', 1)
        self.min_cost = rospy.get_param('/turtlebot2/min_cost', 0)
        self.n_stacked_frames = rospy.get_param('/turtlebot2/n_stacked_frames',
                                                10)
        self.n_skipped_frames = rospy.get_param('/turtlebot2/n_skipped_frames',
                                                4)

        self.max_linear_speed = rospy.get_param('/turtlebot2/max_linear_speed',
                                                0.65)
        self.max_angular_speed = rospy.get_param(
            '/turtlebot2/max_angular_speed', 1)

        self.min_linear_speed = rospy.get_param('/turtlebot2/min_linear_speed',
                                                0)
        self.min_angular_speed = rospy.get_param(
            '/turtlebot2/min_angular_speed', 0)

        self.robot_number = robot_number
        self._get_goal_location()

        self.pedestrians_info = {}
        self.pedestrians_info["4_robot_3D1P"] = {}
        self.pedestrians_info["train2"] = {}
        self.pedestrians_info["zigzag_3ped"] = {}

        self.pedestrian_pose = {}
        # self.robot_pose = {}

        # Here we will add any init functions prior to starting the MyRobotEnv
        self._get_init_pose()
        super(TurtleBot2MazeEnv, self).__init__(robot_number=robot_number,
                                                initial_pose=self.initial_pose)

        self.gazebo = GazeboConnection(start_init_physics_parameters=True,
                                       robot_number=self.robot_number,
                                       initial_pose=self.initial_pose,
                                       reset_world_or_sim="ROBOT")

        # We create two arrays based on the binary values that will be assigned
        # In the discretization method.
        #laser_scan = self._check_laser_scan_ready()
        laser_scan = self.get_laser_scan()
        rospy.logdebug("laser_scan len===>" + str(len(laser_scan.ranges)))

        # Laser data
        self.laser_scan_frame = laser_scan.header.frame_id

        # Number of laser reading jumped
        self.new_ranges = int(
            math.ceil(
                float(len(laser_scan.ranges)) /
                float(self.n_laser_discretization)))

        rospy.logdebug("n_observations===>" + str(self.n_observations))
        rospy.logdebug("new_ranges, jumping laser readings===>" +
                       str(self.new_ranges))

        high = numpy.full((self.n_observations), self.max_cost)
        low = numpy.full((self.n_observations), self.min_cost)

        # We only use two integers
        self.observation_space = spaces.Box(low, high)

        v_list_high = numpy.full((self.n_observations, self.n_stacked_frames),
                                 self.max_linear_speed)
        w_list_high = numpy.full((self.n_observations, self.n_stacked_frames),
                                 self.max_angular_speed)
        cost_list_high = numpy.full(
            (self.n_observations, self.n_stacked_frames), 1)
        obst_list_high = numpy.full(
            (self.n_observations, self.n_stacked_frames), 1)
        to_goal_list_high = numpy.full(
            (self.n_observations, self.n_stacked_frames), 1)

        v_list_low = numpy.full((self.n_observations, self.n_stacked_frames),
                                self.min_linear_speed)
        w_list_low = numpy.full((self.n_observations, self.n_stacked_frames),
                                self.min_angular_speed)
        cost_list_low = numpy.full(
            (self.n_observations, self.n_stacked_frames), 0)
        obst_list_low = numpy.full(
            (self.n_observations, self.n_stacked_frames), 0)
        to_goal_list_low = numpy.full(
            (self.n_observations, self.n_stacked_frames), 0)

        high = numpy.stack(
            (v_list_high, w_list_high, obst_list_high, to_goal_list_high),
            axis=2)
        low = numpy.stack(
            (v_list_low, w_list_low, obst_list_low, to_goal_list_low), axis=2)

        #MLP obs space
        # high = numpy.full((2*self.n_observations*self.n_stacked_frames, 1), 1)
        # low = numpy.full((2*self.n_observations*self.n_stacked_frames, 1), 0)

        self.observation_space = spaces.Box(low, high)

        rospy.logdebug("ACTION SPACES TYPE===>" + str(self.action_space))
        rospy.logdebug("OBSERVATION SPACES TYPE===>" +
                       str(self.observation_space))

        # Rewards
        self.forwards_reward = rospy.get_param("/turtlebot2/forwards_reward",
                                               5)
        self.invalid_penalty = rospy.get_param("/turtlebot2/invalid_penalty",
                                               20)
        self.end_episode_points = rospy.get_param(
            "/turtlebot2/end_episode_points", 2000)
        self.goal_reaching_points = rospy.get_param(
            "/turtlebot2/goal_reaching_points", 500)

        self.cumulated_steps = 0.0

        # Collision danger reward ---- checks the lidar scan and penalizes accordingly
        self.select_collision_danger_cost = True
        self.collision_danger_cost = 0
        self.prox_penalty1 = -1
        self.prox_penalty2 = -3.5
        self.closeness_threshold = 2.0

        self.laser_filtered_pub = rospy.Publisher(
            '/turtlebot' + str(robot_number) + '/laser/scan_filtered',
            LaserScan,
            queue_size=1)
        self.goal_reaching_status_pub = rospy.Publisher(
            '/turtlebot' + str(robot_number) + '/goal_reaching_status',
            Bool,
            queue_size=1)
        self.visualize_obs = False
        self.list_angular_vel = []
        self.list_vel = []

        rospy.Subscriber("/gazebo/model_states", ModelStates,
                         self.callback_modelstates)

        if self.visualize_obs:
            os.chdir("../")
            if not os.path.isdir("observation_visualization"):
                os.mkdir("observation_visualization")
                os.chdir("observation_visualization")
                os.mkdir("v")
                os.mkdir("w")
                os.mkdir("cost")
                os.mkdir("obst")
                os.mkdir("toGoal")
                os.chdir("../")
                os.chdir("src")
            else:
                os.chdir("src")
        self.episode_num = 0
        self.total_collisions = 0
        self.episode_collisions = 0
        self.n_skipped_count = 0
        self.goal_reaching_status = Bool()
        self.goal_reaching_status.data = False
Beispiel #5
0
class RobotGazeboEnv(gym.Env):
    def __init__(self,
                 robot_name_space,
                 controllers_list,
                 reset_controls,
                 start_init_physics_parameters=True,
                 reset_world_or_sim="SIMULATION",
                 **kwargs):

        env_id = kwargs.get('env_id', 1)
        self.env_id = env_id
        self.robotID = kwargs.get('robotID', 1)
        self.num_robots = kwargs.get('num_robots', 1)
        bashCommand = 'hostname -I | cut -d' ' -f1)'
        ROSIP = socket.gethostbyname(socket.gethostname())
        if env_id >= 0:
            #os.environ['ROS_PACKAGE_PATH']=ROS_PACKAGE_PATH
            #@HACK Setting ros and gazebo masters can be automated
            #the gazebo master is different for different environment IDs as they can run on multiple computers
            os.environ['ROS_MASTER_URI'] = "http://" + ROSIP + ":1131" + str(
                env_id)[0]
            if env_id < 3:
                GAZEBOIP = ROSIP
                os.environ[
                    'GAZEBO_MASTER_URI'] = "http://" + GAZEBOIP + ":1135" + str(
                        env_id)[0]
            else:
                GAZEBOIP = ROSIP
                os.environ[
                    'GAZEBO_MASTER_URI'] = "http://" + GAZEBOIP + ":1135" + str(
                        env_id)[0]
            os.environ['ROS_IP'] = ROSIP
            os.environ['ROS_HOSTNAME'] = ROSIP
            rospy.init_node('firefly_env_' + str(env_id)[0] +
                            str(self.robotID),
                            anonymous=True,
                            disable_signals=True)
            print("WORKER NODE " + str(env_id)[0] + str(self.robotID))

        # To reset Simulations
        rospy.logdebug("START init RobotGazeboEnv")
        self.gazebo = GazeboConnection(start_init_physics_parameters,
                                       reset_world_or_sim, self.robotID)
        self.controllers_object = ControllersConnection(
            namespace=robot_name_space, controllers_list=controllers_list)
        self.reset_controls = reset_controls
        self.seed()

        # Set up ROS related variables
        self.episode_num = 0
        self.cumulated_episode_reward = 0
        #        self.reward_pub = rospy.Publisher('/openai/reward', RLExperimentInfo, queue_size=1)
        rospy.logdebug("END init RobotGazeboEnv")
        # self.pretrained_model = PPO2.load("/home/rtallamraju/drl_ws/logs/parallel_ppo_cartesian_att3_try6/snapshots/trained_model.pkl")
        # self.pretrained_model = PPO2.load("/home/rtallamraju/drl_ws/logs/drl_singleagent_try9/snapshots/best_model.pkl")

    def create_circle(self, radius=8):
        self.theta = [k for k in np.arange(0, 6.28, 0.1)]
        x = radius * np.cos(self.theta)
        y = radius * np.sin(self.theta)
        self.init_circle = [x, y]

    # Env methods
    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def step(self, action):
        """
        Function executed each time step.
        Here we get the action execute it in a time step and retrieve the
        observations generated by that action.
        :param action:
        :return: obs, reward, done, info
        """
        """
        Here we should convert the action num to movement action, execute the action in the
        simulation and get the observations result of performing that action.
        """
        rospy.logdebug("START STEP OpenAIROS")

        self.gazebo.unpauseSim()
        self._set_action(action)
        obs = self._get_obs()
        done = self._is_done(obs)
        info = {}
        reward = self._compute_reward(obs, done)
        self.cumulated_episode_reward += reward

        rospy.logdebug("END STEP OpenAIROS")

        return obs, reward, done, info

    def reset(self, robotID=1):
        rospy.logwarn("Reseting RobotGazeboEnvironment")
        # if robotID == 1:
        self._reset_sim()
        self._init_env_variables()
        self._update_episode()
        obs = self._get_obs()
        rospy.logdebug("END Reseting RobotGazeboEnvironment")

        return obs

    def close(self):
        """
        Function executed when closing the environment.
        Use it for closing GUIS and other systems that need closing.
        :return:
        """
        rospy.logdebug("Closing RobotGazeboEnvironment")
        rospy.signal_shutdown("Closing RobotGazeboEnvironment")

    def _update_episode(self):
        """
        Publishes the cumulated reward of the episode and
        increases the episode number by one.
        :return:
        """
        # rospy.logwarn("PUBLISHING REWARD...")
        # self._publish_reward_topic(
        #                             self.cumulated_episode_reward,
        #                             self.episode_num
        #                             )
        rospy.logwarn("PUBLISHING REWARD...DONE=" +
                      str(self.cumulated_episode_reward) + ",EP=" +
                      str(self.episode_num))

        self.episode_num += 1
        self.cumulated_episode_reward = 0

    def _publish_reward_topic(self, reward, episode_number=1):
        """
        This function publishes the given reward in the reward topic for
        easy access from ROS infrastructure.
        :param reward:
        :param episode_number:
        :return:
        """
        reward_msg = RLExperimentInfo()
        reward_msg.episode_number = episode_number
        reward_msg.episode_reward = reward
        self.reward_pub.publish(reward_msg)

    # Extension methods
    # ----------------------------

    def _reset_sim(self):
        """Resets a simulation
        """
        rospy.logdebug("RESET SIM START")
        if self.reset_controls:
            rospy.logdebug("RESET CONTROLLERS")
            self.gazebo.unpauseSim()

            self.controllers_object.reset_controllers()
            self._check_all_systems_ready()
            self._set_init_pose()
            self.gazebo.pauseSim()
            self.gazebo.resetSim(self.robotID)
            self.gazebo.unpauseSim()
            self.controllers_object.reset_controllers()
            self._check_all_systems_ready()
            self.gazebo.pauseSim()

        else:
            # if self.robotID == 1:
            rospy.logwarn("DONT RESET CONTROLLERS")
            self.gazebo.unpauseSim()
            self._check_all_systems_ready()
            self._set_init_pose()
            self.gazebo.pauseSim()
            self.gazebo.resetSim(self.robotID)
            self.gazebo.unpauseSim()
            self._check_all_systems_ready()
            # self.gazebo.pauseSim()
            # else:
            # self.gazebo.unpauseSim()

        rospy.logdebug("RESET SIM END")
        return True

    def _set_init_pose(self):
        """Sets the Robot in its init pose
        """
        self.create_circle(radius=5)
        machine_name = '/machine_' + str(self.robotID)
        self.command_topic = machine_name + "/command"
        self._cmd_vel_pub = rospy.Publisher(self.command_topic,
                                            uav_pose,
                                            queue_size=1)
        outPose = uav_pose()
        outPose.header.stamp = rospy.Time.now()
        outPose.header.frame_id = "world"

        outPose.POI.x = 0
        outPose.POI.y = 0
        outPose.POI.z = 0

        r = 8
        t = np.random.choice(63, 1)
        outPose.position.x = r * np.cos(self.theta[t[0]])
        outPose.position.y = r * np.sin(self.theta[t[0]])
        outPose.position.z = -r
        self._cmd_vel_pub.publish(outPose)

    def _check_all_systems_ready(self):
        """
        Checks that all the sensors, publishers and other simulation systems are
        operational.
        """
        raise NotImplementedError()

    def _get_obs(self):
        """Returns the observation.
        """
        raise NotImplementedError()

    def _init_env_variables(self):
        """Inits variables needed to be initialised each time we reset at the start
        of an episode.
        """
        raise NotImplementedError()

    def _set_action(self, action):
        """Applies the given action to the simulation.
        """
        raise NotImplementedError()

    def _is_done(self, observations):
        """Indicates whether or not the episode is done ( the robot has fallen for example).
        """
        raise NotImplementedError()

    def _compute_reward(self, observations, done):
        """Calculates the reward to give based on the observations given.
        """
        raise NotImplementedError()

    def _env_setup(self, initial_qpos):
        """Initial configuration of the environment. Can be used to configure initial state
        and extract information from the simulation.
        """
        raise NotImplementedError()
Beispiel #6
0
class RobotGazeboEnv(gym.GoalEnv):
    def __init__(self, robot_name_space, controllers_list, reset_controls):

        # To reset Simulations
        print("Entered Gazebo Env")
        self.gazebo = GazeboConnection(start_init_physics_parameters=False,
                                       reset_world_or_sim=True)
        self.controllers_object = ControllersConnection(
            namespace=robot_name_space, controllers_list=controllers_list)
        self.reset_controls = reset_controls
        print(self.reset_controls)
        self.seed()

        # Set up ROS related variables
        self.episode_num = 0
        # self.reward_pub = rospy.Publisher('/openai/reward', RLExperimentInfo, queue_size=1)

    # Env methods
    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def step(self, action):
        """
        Function executed each time step.
        Here we get the action execute it in a time step and retrieve the
        observations generated by that action.
        :param action:
        :return: obs, reward, done, info
        """
        """
        Here we should convert the action num to movement action, execute the action in the
        simulation and get the observations result of performing that action.
        """
        print("Entered step")
        print("Unpause sim")
        self.gazebo.unpauseSim()
        print("Set action")
        print("Action:")
        print(action)
        self._set_action(action)
        print("Pause sim")
        #self.gazebo.pauseSim()
        print("Get Obs")
        obs = self._get_obs()
        print("Is done")
        done = self._is_done(obs)
        info = {}
        reward = self._compute_reward(obs, done)
        self._publish_reward_topic(reward, self.episode_num)

        return obs, reward, done, info

    def reset(self):
        rospy.logdebug("Reseting RobotGazeboEnvironment")
        print("Entered reset")
        self._reset_sim()
        self._init_env_variables()
        self._update_episode()
        obs = self._get_obs()
        return obs

    def close(self):
        """
        Function executed when closing the environment.
        Use it for closing GUIS and other systems that need closing.
        :return:
        """
        rospy.logdebug("Closing RobotGazeboEnvironment")
        rospy.signal_shutdown("Closing RobotGazeboEnvironment")

    def _update_episode(self):
        """
        Increases the episode number by one
        :return:
        """
        self.episode_num += 1

    def _publish_reward_topic(self, reward, episode_number=1):
        """
        This function publishes the given reward in the reward topic for
        easy access from ROS infrastructure.
        :param reward:
        :param episode_number:
        :return:
        """
        return
        reward_msg = RLExperimentInfo()
        reward_msg.episode_number = episode_number
        reward_msg.episode_reward = reward
        self.reward_pub.publish(reward_msg)

    # Extension methods
    # ----------------------------

    def _reset_sim(self):
        """Resets a simulation
        """
        if self.reset_controls:
            self.gazebo.unpauseSim()
            self.controllers_object.reset_controllers()
            self._check_all_systems_ready()
            self._set_init_pose()
            self.gazebo.pauseSim()
            self.gazebo.resetSim()
            self.gazebo.unpauseSim()
            self.controllers_object.reset_controllers()
            self._check_all_systems_ready()
            self.gazebo.pauseSim()

        else:
            self.gazebo.unpauseSim()

            self._check_all_systems_ready()
            self._set_init_pose()
            #self.gazebo.pauseSim()
            self.gazebo.resetWorld()
            #self.gazebo.unpauseSim()

            self._check_all_systems_ready()
            #self.gazebo.pauseSim()

        return True

    def _set_init_pose(self):
        """Sets the Robot in its init pose
        """
        raise NotImplementedError()

    def _check_all_systems_ready(self):
        """
        Checks that all the sensors, publishers and other simulation systems are
        operational.
        """
        raise NotImplementedError()

    def _get_obs(self):
        """Returns the observation.
        """
        raise NotImplementedError()

    def _init_env_variables(self):
        """Inits variables needed to be initialised each time we reset at the start
        of an episode.
        """
        raise NotImplementedError()

    def _set_action(self, action):
        """Applies the given action to the simulation.
        """
        raise NotImplementedError()

    def _is_done(self, observations):
        """Indicates whether or not the episode is done ( the robot has fallen for example).
        """
        raise NotImplementedError()

    def _compute_reward(self, observations, done):
        """Calculates the reward to give based on the observations given.
        """
        raise NotImplementedError()

    def _env_setup(self, initial_qpos):
        """Initial configuration of the environment. Can be used to configure initial state
        and extract information from the simulation.
        """
        raise NotImplementedError()
class RosbotGazeboEnv(gym.Env):
    """
        RosbotGazeboEnv class acts as abstract gym environment template
    """

    metadata = {'render.modes': ['human']}

    def __init__(self, reset_type: str = 'SIMULATION'):
        """
        Initialize RosbotGazeboEnv class

        Parameters
        ----------
        reset_type: str
            This paremeter is used for creating GazeboConnection instance
            Possible values are: ['SIMULATION', 'WORLD']

        """

        # create GazeboConnection instance
        self.gazebo = GazeboConnection(reset_type=reset_type)

        self.seed()

    def seed(self, seed=None):
        """
        Set the random seed value for the gym environment
        """
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def reset(self):
        """
        Override gym environment reset() with custom logic

        Returns
        -------
        obs:
            observation from the environment

        """

        # reset the gazebo simulation
        self._reset_sim()
        # get latest observation
        obs = self._get_obs()
        rospy.loginfo('status: environment is reset')
        return obs

    def close(self):
        """
        Override gym environment close() with custom logic

        """

        # initiate node shutdown
        rospy.signal_shutdown('closing RosbotGazeboEnv')
        rospy.loginfo('status: environment is closed')
        rospy.loginfo('======================================')

    def render(self, mode='human'):
        """
        Override gym environment render() with custom logic

        """

        super(RosbotGazeboEnv, self).render(mode=mode)

    def step(self, action):
        """
        Override gym environment step() with custom logic

        Parameters
        ----------
        action:
            action to be executed in the environment

        Returns
        -------
        obs:
            observation from the environment
        reward:
            amount of reward achieved by taking the action
        done:
            indicate whether or not episode is done
        info:
            diagnostic information for debugging

        """

        # execute the action
        self.gazebo.unpause_sim()
        self._set_action(action)
        self._check_all_systems_are_ready()  # get latest system data
        self.gazebo.pause_sim()

        # compute the required fields
        obs = self._get_obs()
        done = self._is_done()
        reward = self._compute_reward(obs, done)
        info = {}

        return obs, reward, done, info

    # ===== =====

    def _reset_sim(self):
        """
        Custom logic to reset the gazebo simulation

        """

        # pre-reset tasks
        self.gazebo.unpause_sim()
        self._check_all_systems_are_ready()
        self._set_init_pose()
        self.gazebo.pause_sim()

        # reset the gazebo
        #self.gazebo.reset_sim()

        self.gazebo.clear_all_spawned_models()
        # TODO: sdf_model should randomly change
        sdf_model = 'sample'
        self.gazebo.spawn_sdf_model(sdf_model, Pose())

        # set environment variables each time we reset
        self._init_env_variables()

        # check if everything working fine after reset
        self.gazebo.unpause_sim()
        self._check_all_systems_are_ready()
        self.gazebo.pause_sim()

    def _init_env_variables(self):
        """
        Initialize environment variables
        """
        raise NotImplementedError()

    def _set_init_pose(self):
        """
        Set the initial pose of the rosbot
        """
        raise NotImplementedError()

    def _get_obs(self):
        """
        Return the observation from the environment
        """
        raise NotImplementedError()

    def _check_all_systems_are_ready(self):
        """
        Checks all sensors and other simulation systems are operational
        """
        raise NotImplementedError()

    def _is_done(self):
        """
        Indicates whether or not the episode is done
        """
        raise NotImplementedError()

    def _compute_reward(self, observation, done):
        """
        Calculate the reward based on the observation
        """
        raise NotImplementedError()

    def _set_action(self, action):
        """
        Apply the give action to the environment
        """
        raise NotImplementedError()