Esempio n. 1
0
class Atlas(gym.Env):
    """
        Y axis is the vertical axis.
        Base class for Webots actors in a Scene.
        These environments create single-player scenes and behave like normal Gym environments, if
        you don't use multiplayer.
    """

    electricity_cost = -2.0  # cost for using motors -- this parameter should be carefully tuned against reward for making progress, other values less improtant
    stall_torque_cost = -0.1  # cost for running electric current through a motor even at zero rotational speed, small
    foot_collision_cost = -1.0  # touches another leg, or other objects, that cost makes robot avoid smashing feet into itself
    joints_at_limit_cost = -0.2  # discourage stuck joints

    episode_reward = 0

    frame = 0
    _max_episode_steps = 1000

    initial_y = None
    body_xyz = None
    joint_angles = None
    joint_exceed_limit = False
    ignore_frame = 1


    def __init__(self, action_dim, obs_dim):

        self.robot = Supervisor()
        solid_def_names = self.read_all_def()
        self.def_node_field_list = self.get_all_fields(solid_def_names)

        self.robot_node = self.robot.getFromDef('Atlas')

        self.boom_base = self.robot.getFromDef('BoomBase')
        self.boom_base_trans_field = self.boom_base.getField("translation")
        self.timeStep = int(self.robot.getBasicTimeStep() * self.ignore_frame) # ms
        self.find_and_enable_devices()

        high = np.ones([action_dim])
        self.action_space = gym.spaces.Box(-high, high, dtype=np.float32)
        high = np.inf*np.ones([obs_dim])
        self.observation_space = gym.spaces.Box(-high, high, dtype=np.float32)

    def read_all_def(self, file_name ='../../worlds/atlas_change_foot.wbt'):
        no_solid_str_list = ['HingeJoint', 'BallJoint', 'Hinge2Joint', 'Shape', 'Group', 'Physics']
        with open(file_name) as f:
            content = f.readlines()
        # you may also want to remove whitespace characters like `\n` at the end of each line
        content = [x.strip() for x in content]
        def_str_list = []
        for x in content:
            if 'DEF' in x:
                def_str_list.append(x)
        for sub_str in no_solid_str_list:
            for i in range(len(def_str_list)):
                if sub_str in def_str_list[i]:
                    def_str_list[i] = 'remove_def'
        solid_def_names = []
        for def_str in def_str_list:
            if 'remove_def' != def_str:
                def_str_temp_list = def_str.split()
                solid_def_names.append(def_str_temp_list[def_str_temp_list.index('DEF') + 1])
        print(solid_def_names)
        print('There are duplicates: ',len(solid_def_names) != len(set(solid_def_names)))
        return solid_def_names

    def get_all_fields(self, solid_def_names):
        def_node_field_list = []
        for def_name in solid_def_names:
            def_node = self.robot.getFromDef(def_name)
            node_trans_field = def_node.getField("translation")
            node_rot_field = def_node.getField("rotation")
            # print(def_name)
            node_ini_trans = node_trans_field.getSFVec3f()
            node_ini_rot = node_rot_field.getSFRotation()

            def_node_field_list.append({'def_node': def_node,
                                        'node_trans_field': node_trans_field,
                                        'node_rot_field': node_rot_field,
                                        'node_ini_trans': node_ini_trans,
                                        'node_ini_rot': node_ini_rot})
        return def_node_field_list

    def reset_all_fields(self):
        for def_node_field in self.def_node_field_list:
            def_node_field['node_trans_field'].setSFVec3f(def_node_field['node_ini_trans'])
            def_node_field['node_rot_field'].setSFRotation(def_node_field['node_ini_rot'])

    def find_and_enable_devices(self):
        # inertial unit
        self.inertial_unit = self.robot.getInertialUnit("inertial unit")
        self.inertial_unit.enable(self.timeStep)

        # gps
        self.gps = self.robot.getGPS("gps")
        self.gps.enable(self.timeStep)

        # foot sensors

        self.fsr = [self.robot.getTouchSensor("RFsr"), self.robot.getTouchSensor("LFsr")]
        for i in range(len(self.fsr)):
            self.fsr[i].enable(self.timeStep)

        # all motors
        # motor_names = [# 'HeadPitch', 'HeadYaw',
        #                'LLegUay', 'LLegLax', 'LLegKny',
        #                'LLegLhy', 'LLegMhx', 'LLegUhz',
        #                'RLegUay', 'RLegLax', 'RLegKny',
        #                'RLegLhy', 'RLegMhx', 'RLegUhz',
        #                ]
        motor_names = [  # 'HeadPitch', 'HeadYaw',
             'BackLbz', 'BackMby', 'BackUbx', 'NeckAy',
             'LLegLax', 'LLegMhx', 'LLegUhz',
             'RLegLax', 'RLegMhx', 'RLegUhz',
        ]
        self.motors = []
        for i in range(len(motor_names)):
            self.motors.append(self.robot.getMotor(motor_names[i]))

        # leg pitch motors
        self.legPitchMotor = [self.robot.getMotor('RLegLhy'),
                              self.robot.getMotor('RLegKny'),
                              self.robot.getMotor('RLegUay'),
                              self.robot.getMotor('LLegLhy'),
                              self.robot.getMotor('LLegKny'),
                              self.robot.getMotor('LLegUay')]

        for i in range(len(self.legPitchMotor)):
            self.legPitchMotor[i].enableTorqueFeedback(self.timeStep)

        # leg pitch sensors
        self.legPitchSensor =[self.robot.getPositionSensor('RLegLhyS'),
                              self.robot.getPositionSensor('RLegKnyS'),
                              self.robot.getPositionSensor('RLegUayS'),
                              self.robot.getPositionSensor('LLegLhyS'),
                              self.robot.getPositionSensor('LLegKnyS'),
                              self.robot.getPositionSensor('LLegUayS')]
        for i in range(len(self.legPitchSensor)):
            self.legPitchSensor[i].enable(self.timeStep)


    def apply_action(self, a):
        assert (np.isfinite(a).all())
        for n, j in enumerate(self.legPitchMotor):
            max_joint_angle = j.getMaxPosition()
            min_joint_angle = j.getMinPosition()
            mean_angle = 0.5 * (max_joint_angle + min_joint_angle)
            half_range_angle = 0.5 * (max_joint_angle - min_joint_angle)
            j.setPosition(mean_angle + half_range_angle * float(np.clip(a[n], -1, +1)))

            # joint_angle = self.read_joint_angle(joint_idx=n)
            # torque = 0.5 * j.getMaxTorque() * float(np.clip(a[n], -1, +1))
            # if joint_angle > max_joint_angle:
            #     j.setPosition(max_joint_angle - 0.1)
            #     # j.setTorque(-1.0 * abs(torque))
            # elif joint_angle < min_joint_angle:
            #     j.setPosition(min_joint_angle + 0.1)
            #     # j.setTorque(abs(torque))
            # else:
            #     j.setTorque(torque)


    def read_joint_angle(self, joint_idx):
        joint_angle = self.legPitchSensor[joint_idx].getValue() % (2.0 * np.pi)
        if joint_angle > np.pi:
            joint_angle -= 2.0 * np.pi
        max_joint_angle = self.legPitchMotor[joint_idx].getMaxPosition()
        min_joint_angle = self.legPitchMotor[joint_idx].getMinPosition()
        if joint_angle > max_joint_angle + 0.05 or joint_angle < min_joint_angle - 0.05:
            self.joint_exceed_limit = True
        return joint_angle

    def calc_state(self):
        joint_states = np.zeros(2*len(self.legPitchMotor))
        # even elements [0::2] position, scaled to -1..+1 between limits
        for r in range(6):
            joint_angle = self.read_joint_angle(joint_idx=r)
            if r in [0, 3]:
                joint_states[2 * r] = (-joint_angle - np.deg2rad(35)) / np.deg2rad(80)
            elif r in [1, 4]:
                joint_states[2 * r] = 1 - joint_angle / np.deg2rad(75)
            elif r in [2, 5]:
                joint_states[2 * r] = -joint_angle / np.deg2rad(45)
        # odd elements  [1::2] angular speed, scaled to show -1..+1
        for r in range(6):
            if self.joint_angles is None:
                joint_states[2 * r + 1] = 0.0
            else:
                joint_states[2 * r + 1] = 0.5 * (joint_states[2*r] - self.joint_angles[r])

        self.joint_angles = np.copy(joint_states[0::2])
        self.joint_speeds = joint_states[1::2]
        self.joints_at_limit = np.count_nonzero(np.abs(joint_states[0::2]) > 0.99)

        self.joint_torques = np.zeros(len(self.legPitchMotor))
        for i in range(len(self.legPitchMotor)):
            self.joint_torques[i] = self.legPitchMotor[i].getTorqueFeedback() \
                                    / self.legPitchMotor[i].getAvailableTorque()

        if self.body_xyz is None:
            self.body_xyz = np.asarray(self.gps.getValues())
            self.body_speed = np.zeros(3)
        else:
            self.body_speed = (np.asarray(self.gps.getValues()) - self.body_xyz) / (self.timeStep * 1e-3)
            self.body_xyz = np.asarray(self.gps.getValues())

        body_local_speed = np.copy(self.body_speed)
        body_local_speed[0], body_local_speed[2] = self.calc_local_speed()

        # print('speed: ', np.linalg.norm(self.body_speed))
        y = self.body_xyz[1]
        if self.initial_y is None:
            self.initial_y = y

        self.body_rpy = self.inertial_unit.getRollPitchYaw()
        '''
        The roll angle indicates the unit's rotation angle about its x-axis, 
        in the interval [-π,π]. The roll angle is zero when the InertialUnit is horizontal, 
        i.e., when its y-axis has the opposite direction of the gravity (WorldInfo defines 
        the gravity vector).

        The pitch angle indicates the unit's rotation angle about is z-axis, 
        in the interval [-π/2,π/2]. The pitch angle is zero when the InertialUnit is horizontal, 
        i.e., when its y-axis has the opposite direction of the gravity. 
        If the InertialUnit is placed on the Robot with a standard orientation, 
        then the pitch angle is negative when the Robot is going down, 
        and positive when the robot is going up.

        The yaw angle indicates the unit orientation, in the interval [-π,π], 
        with respect to WorldInfo.northDirection. 
        The yaw angle is zero when the InertialUnit's x-axis is aligned with the north direction, 
        it is π/2 when the unit is heading east, and -π/2 when the unit is oriented towards the west. 
        The yaw angle can be used as a compass.
        '''

        more = np.array([
            y - self.initial_y,
            0, 0,
            0.3 * body_local_speed[0], 0.3 * body_local_speed[1], 0.3 * body_local_speed[2],
            # 0.3 is just scaling typical speed into -1..+1, no physical sense here
            self.body_rpy[0] / np.pi, self.body_rpy[1] / np.pi], dtype=np.float32)

        self.feet_contact = np.zeros(2)
        for j in range(len(self.fsr)):
            self.feet_contact[j] = self.fsr[j].getValue()

        return np.clip(np.concatenate([more] + [joint_states] + [self.feet_contact]), -5, +5)

    def calc_local_speed(self):
        '''
        # progress in potential field is speed*dt, typical speed is about 2-3 meter per second,
        this potential will change 2-3 per frame (not per second),
        # all rewards have rew/frame units and close to 1.0
        '''
        direction_r = self.body_xyz - np.asarray(self.boom_base_trans_field.getSFVec3f())
        # print('robot_xyz: {}, boom_base_xyz: {}'.format(self.body_xyz,
        #                                                 np.asarray(self.boom_base_trans_field.getSFVec3f())))
        direction_r = direction_r[[0, 2]] / np.linalg.norm(direction_r[[0, 2]])
        direction_t = np.dot(np.asarray([[0, 1],
                                  [-1, 0]]), direction_r.reshape((-1, 1)))
        return np.dot(self.body_speed[[0, 2]], direction_t), np.dot(self.body_speed[[0, 2]], direction_r)

    def alive_bonus(self, y, pitch):
        return +1 if abs(y) > 0.5 and abs(pitch) < 1.0 else -1

    def render(self, mode='human'):
        file_name = 'img.jpg'
        self.robot.exportImage(file=file_name, quality=100)
        return cv2.imread(file_name, -1)

    def step(self, action):
        for i in range(self.ignore_frame):
            self.apply_action(action)
            simulation_state = self.robot.step(self.timeStep)
        state = self.calc_state()  # also calculates self.joints_at_limit
        # state[0] is body height above ground, body_rpy[1] is pitch
        alive = float(self.alive_bonus(state[0] + self.initial_y,
                                       self.body_rpy[1]))

        progress, _ = self.calc_local_speed()
        # print('progress: {}'.format(progress))

        feet_collision_cost = 0.0


        '''
        let's assume we have DC motor with controller, and reverse current braking
        '''
        electricity_cost = self.electricity_cost * float(np.abs(
            self.joint_torques * self.joint_speeds).mean())
        electricity_cost += self.stall_torque_cost * float(np.square(self.joint_torques).mean())

        joints_at_limit_cost = float(self.joints_at_limit_cost * self.joints_at_limit)

        rewards = [
            alive,
            progress,
            electricity_cost,
            joints_at_limit_cost,
            feet_collision_cost
        ]

        self.episode_reward += progress

        self.frame += 1

        done = (-1 == simulation_state) or (self._max_episode_steps <= self.frame) \
               or (alive < 0) or (not np.isfinite(state).all())
        # print('frame: {}, alive: {}, done: {}, body_xyz: {}'.format(self.frame, alive, done, self.body_xyz))
        # print('state_{} \n action_{}, reward_{}'.format(state, action, sum(rewards)))
        return state, sum(rewards), done, {}

    def run(self):
        # Main loop.
        for i in range(self._max_episode_steps):
            action = np.random.uniform(-1, 1, 6)
            state, reward, done, _ = self.step(action)
            # print('state_{} \n action_{}, reward_{}'.format(state, action, reward))
            if done:
                break

    def reset(self, is_eval_only = False):
        self.initial_y = None
        self.body_xyz = None
        self.joint_angles = None
        self.frame = 0
        self.episode_reward = 0
        self.joint_exceed_limit = False

        for i in range(100):
            for j in self.motors:
                j.setPosition(0)
            for k in range(len(self.legPitchMotor)):
                j = self.legPitchMotor[k]
                j.setPosition(0)
            self.robot.step(self.timeStep)
        self.robot.simulationResetPhysics()
        self.reset_all_fields()
        for i in range(10):
            self.robot_node.moveViewpoint()
            self.robot.step(self.timeStep)
        if is_eval_only:
            time.sleep(1)
        return self.calc_state()
Esempio n. 2
0
class P7RLEnv(gym.Env):
    metadata = {'render.modes': ['human']}
    motors = []
    timeStep = 32
    restrictedGoals = True
    boxEnv = False
    isFixedGoal = False
    fixedGoal = [3.5, 0]
    logging = True
    maxTimestep = 20000
    robotResetLocation = [0, 0, 0]

    def __init__(self):
        # Initialize TurtleBot specifics
        self.maxAngSpeed = 1  # 2.84 max
        self.maxLinSpeed = 0.14  # 0.22 max
        # Initialize reward parameters:
        self.moveTowardsParam = 1
        self.moveAwayParam = 1.1
        self.safetyLimit = 0.5
        self.obsProximityParam = 0.01
        self.faceObstacleParam = 10
        # Total reward counter for each component
        self.moveTowardGoalTotalReward = 0
        self.obsProximityTotalPunish = 0
        self.faceObstacleTotalPunish = 0
        # Initialize RL specific variables
        self.rewardInterval = 1  # How often do you want to get rewards
        self.epConc = ''  # Conculsion of episode [Collision, Success, TimeOut]
        self.reward = 0  # Reward gained in the timestep
        self.totalreward = 0  # Reward gained throughout the episode
        self.counter = 0  # Timestep counter
        self.needReset = True  #
        self.state = []  # State of the environment
        self.done = False  # If the episode is done
        self.action = [0, 0]  # Current action chosen
        self.prevAction = [0, 0]  # Past action chosen
        self.goal = []  # Goal
        self.goals = [x / 100 for x in range(-450, 451, 150)]
        self.dist = 0  # Distance to the goal
        self.prevDist = 0  # Previous distance to the goal
        # Logging variables
        self.logDir = ""
        self.path = ""
        self.currentEpisode = 0  # Current episode number
        self.start = 0  # Timer for starting
        self.duration = 0  # Episode duration in seconds
        self.epInfo = []  # Logging info for the episode
        self.startPosition = []
        self.prevMax = 0  # temporary variable
        # Initialize a supervisor
        self.supervisor = Supervisor()
        # Initialize robot
        self.robot = self.supervisor.getFromDef('TB')
        self.translationField = self.robot.getField('translation')
        self.orientationField = self.robot.getField('rotation')
        # Initialize lidar
        self.lidar = self.supervisor.getLidar('TurtleBotLidar')
        self.lidar.enable(self.timeStep)
        self.lidarRanges = []
        # Initialize Motors
        self.motors.append(self.supervisor.getMotor("left wheel motor"))
        self.motors.append(self.supervisor.getMotor("right wheel motor"))
        self.motors[0].setPosition(float("inf"))
        self.motors[1].setPosition(float("inf"))
        self.motors[0].setVelocity(0.0)
        self.motors[1].setVelocity(0.0)
        self.direction = []
        self.position = []
        self.orientation = []
        # Initialize Goal Object
        self.goalObject = self.supervisor.getFromDef('GOAL')
        self.goalTranslationField = self.goalObject.getField('translation')
        # Initialize action-space and observation-space
        self.action_space = spaces.Box(
            low=np.array([-self.maxLinSpeed, -self.maxAngSpeed]),
            high=np.array([self.maxLinSpeed, self.maxAngSpeed]),
            dtype=np.float32)
        self.observation_space = spaces.Box(low=-10,
                                            high=10,
                                            shape=(24, ),
                                            dtype=np.float16)

    def reset(self):
        if not self.logDir: self._getLogDirectory()
        self._startEpisode()  # Reset evetything needs resetting
        self.supervisor.step(1)  # Make it Happen
        self.goal = self._setGoal()  # Set Goal
        self._resetObject(self.goalObject, [self.goal[0], 0.01, self.goal[1]])
        self._getState()
        self.startPosition = self.position[:]
        self.state = [
            self.prevAction[0], self.prevAction[1], self.dist, self.direction
        ] + self.lidarRanges  # Set State
        return np.asarray(self.state)

    def step(self, action):
        self.prevAction = self.action[:]  # Set previous action
        self.action = action  # Take action
        self._take_action(action)
        self.supervisor.step(1)
        self.counter += 1
        #   Get State(dist from obstacle + lidar) and convert to numpyarray
        self._getState()  # Observe new state
        self.state = np.asarray([
            self.prevAction[0], self.prevAction[1], self.dist, self.direction
        ] + self.lidarRanges)
        self.reward = self._calculateReward()  #   get Reward
        self.done, extraReward = self._isDone(
        )  #   determine if state is Done, extra reward/punishment
        self.reward += extraReward
        self.totalreward += self.reward
        return [self.state, self.reward, self.done, {}]

    def _trimLidarReadings(self, lidar):
        lidarReadings = []
        new_lidars = [x if x != 0 else 3.5
                      for x in lidar]  # Replace 0's with 3.5 (max reading)
        for x in range(0, 360, 18):
            end = x + 18
            lidarReadings.append(min(
                new_lidars[x:end]))  # Take the minimum of lidar Readings
        return lidarReadings

    def _resetObject(self, object, coordinates):
        object.getField('translation').setSFVec3f(coordinates)
        object.getField('rotation').setSFRotation([0, 1, 0, 0])
        object.resetPhysics()

    def _setGoal(self):
        if not self.isFixedGoal:
            while True:
                if self.restrictedGoals:
                    gs = random.sample(self.goals, 1)
                    gs.append(random.randrange(-45, 45) / 10)
                    g = gs[:] if np.random.randint(2) == 0 else [gs[1], gs[0]]
                elif self.boxEnv:
                    xGoal = random.randrange(
                        -40, -25) / 10 if np.random.randint(
                            2) == 0 else random.randrange(25, 40) / 10
                    yGoal = random.randrange(-40, 40) / 10
                    g = [yGoal, xGoal
                         ] if np.random.randint(2) == 0 else [xGoal, yGoal]
                else:
                    xGoal = random.randrange(-40, 40) / 10
                    yGoal = random.randrange(-40, 40) / 10
                    g = [xGoal, yGoal]
                pos1 = self.robot.getPosition()[0]
                pos2 = self.robot.getPosition()[2]
                distFromGoal = ((g[0] - pos1)**2 + (g[1] - pos2)**2)**0.5
                if distFromGoal >= 1:
                    break
        else:
            g = self.fixedGoal
        print('Goal set with Coordinates < {}, {} >'.format(g[0], g[1]))
        return g

    def _getDist(self):
        return ((self.goal[0] - self.position[0])**2 +
                (self.goal[1] - self.position[1])**2)**0.5

    def _take_action(self, action):
        if self.logging:
            print(
                '\t\033[1mLinear velocity:\t{}\n\tAngular velocity:\t{}\n\033[0m'
                .format(action[0], action[1]))
        convertedVelocity = self.setVelocities(action[0], action[1])
        self.motors[0].setVelocity(float(convertedVelocity[0]))
        self.motors[1].setVelocity(float(convertedVelocity[1]))

    def _isDone(self):
        if self.counter >= self.maxTimestep:  # Check maximum timestep
            self.needReset = True
            self._endEpisode('timeout')
            return True, 0
        minScan = min(list(filter(lambda a: a != 0,
                                  self.lidarRanges[:])))  # Check LiDar
        if minScan < 0.25:
            self.needReset = True
            self._endEpisode('collision')
            return True, -500
        elif self.dist < 0.35:  # Check Goal
            self.needReset = True if self.boxEnv else False
            self._endEpisode('success')
            return True, 500
        else:
            return False, 0

    def render(self, mode='human', close=False):
        pass

    def setVelocities(self, linearV, angularV):
        R = 0.033  # Wheel radius
        L = 0.138  # Wheelbase length
        vr = (2 * linearV + angularV * L) / (2 * R)
        vl = (2 * linearV - angularV * L) / (2 * R)
        #print('\nCommanded linear:\t{} angular:\t {}'.format(linearV, angularV))
        #print('Calculated right wheel:\t{}, left wheel:\t {}'.format(vr, vl))
        return [vl, vr]

    def _getDirection(self):
        # Get direction of goal from Robot
        robgoaly = self.goal[1] - self.position[1]
        robgoalx = self.goal[0] - self.position[0]
        goal_angle = math.atan2(robgoalx, robgoaly)
        # Get difference between goal direction and orientation
        heading = goal_angle - self.orientation
        if heading > pi:  # Wrap around pi
            heading -= 2 * pi
        elif heading < -pi:
            heading += 2 * pi
        return heading

    def _calculateReward(self):
        action1 = np.round(self.action[0], 1)
        action2 = np.round(self.action[1], 1)
        if self.counter % self.rewardInterval != 0:
            return 0  # Check if it's Reward Time
        distRate = self.prevDist - self.dist
        if self.prevDist == 0:
            self.prevDist = self.dist
            return 0

        moveTowardsGoalReward = self._rewardMoveTowardsGoal(distRate)
        self.moveTowardGoalTotalReward += moveTowardsGoalReward

        obsProximityPunish = self._rewardObstacleProximity()
        self.obsProximityTotalPunish += obsProximityPunish

        faceObstaclePunish = -self._rewardFacingObstacle()
        self.faceObstacleTotalPunish += faceObstaclePunish

        reward = moveTowardsGoalReward + obsProximityPunish + faceObstaclePunish

        #self._printMax(moveTowardsGoalReward)
        totalRewardDic = {
            "faceObstacleTotalPunish": self.faceObstacleTotalPunish,
            "obsProximityTotalPunish": self.obsProximityTotalPunish,
            "moveTowardGoalTotalReward": self.moveTowardGoalTotalReward
        }
        rewardDic = {
            "faceObstaclePunish": faceObstaclePunish,
            "obsProximityPunish": obsProximityPunish,
            "moveTowardsGoalReward": moveTowardsGoalReward,
            "Total": reward,
            "\033[1mTotalReward\033[0m": self.totalreward
        }
        if self.logging:
            self._printRewards(rewardDic)
            self._printRewards(totalRewardDic)
        self.prevDist = self.dist
        return reward

    def _rewardFacingObstacle(self):
        rewardFaceObs = 0
        # Check forward lidars if moving forward, backward lidars if moving backward
        lidarRange = [-2, 2] if self.action[0] >= 0 else [7, 11]
        for i in range(lidarRange[0], lidarRange[1]):
            scale = 0.15 if i in [-2, 1, 7, 10
                                  ] else 0.35  #Side readings get less weight
            rewardFaceObs += scale * (
                1 - (self.lidarRanges[i] / self.safetyLimit)
            ) if self.lidarRanges[i] < self.safetyLimit else 0
        return self.faceObstacleParam * rewardFaceObs

    def _rewardMoveTowardsGoal(self, distRate):
        if distRate <= 0:
            return self.moveAwayParam * (distRate / 0.0044)
        else:
            return self.moveTowardsParam * (distRate / 0.0044)

    def _rewardObstacleProximity(self):
        closestObstacle = min(self.lidarRanges)
        if closestObstacle <= self.safetyLimit:
            return self.obsProximityParam * -(
                1 - (closestObstacle / self.safetyLimit))
        else:
            return 0

    def SaveAndQuit(self):
        self._write_file('eps.txt', self.currentEpisode)
        with open(self.path + 'P7_NoObstacles.txt', 'a+') as f:
            print('\nSave and Quit \n')
            f.write(repr(self.epInfo))
            f.write('\n')
            f.close()
            print('\tEpisodes Saved')
        self.supervisor.worldReload()

    def _write_file(self, filename, intval):
        with open(self.path + filename, 'w') as fp:
            print(f'Episodes saved: {intval}')
            fp.write(str(intval))
            fp.close()

    def _read_file(self, filename):
        with open(self.path + filename) as fp:
            return int(fp.read())

    def _startEpisode(self):
        if self.start != 0: self._logEpisode()
        # Read episode number
        if self.currentEpisode == 0 and os.path.exists(self.path + 'eps.txt'):
            self.currentEpisode = self._read_file('eps.txt')
        else:
            self.currentEpisode += 1
        print('\n\t\t\t\t EPISODE No. {} \n'.format(self.currentEpisode))
        self.start = time.time()
        if self.needReset:  # Reset object(s) / counters / rewards
            print('Resetting Robot...')
            self._resetObject(self.robot, self.robotResetLocation)
        self.counter = 0
        self.totalreward = 0
        self.moveTowardGoalTotalReward = 0
        self.obsProximityTotalPunish = 0
        self.faceObstacleTotalPunish = 0
        self.prevDist = 0

    def _endEpisode(self, ending):
        self.epConc = ending
        prevMode = self.supervisor.simulationGetMode()
        self.supervisor.simulationSetMode(0)
        self.supervisor.exportImage(
            self.logDir + "screenshots/" + ending + "/" +
            str(self.currentEpisode) + ".jpg", 100)
        self.supervisor.simulationSetMode(prevMode)
        print(ending)

    def _getState(self):
        self.lidarRanges = self._trimLidarReadings(
            self.lidar.getRangeImage())  # Get LidarInfo
        #if self.logging: self._printLidarRanges()
        self.position = self.robot.getPosition(
        )[::2]  # Get Position (x,y) and orientation
        self.orientation = self.orientationField.getSFRotation()[3]
        self.dist = self._getDist()  # Get Eucledian distance from the goal
        self.direction = self._getDirection()

    def _printRewards(self, r, interval=1):
        if self.counter % interval == 0:
            print("\n\033[1mEpisode {}, timestep {}\033[0m".format(
                self.currentEpisode, self.counter))
            for k, v in r.items():
                print("\t" + str(k) + ":\t" + str(v))

    def _printMax(self, reward):
        maxR = reward if self.prevMax < reward else self.prevMax
        self.prevMax = maxR
        print("max: {}".format(maxR))

    def _printLidarRanges(self):
        lidarFormatted = []
        for i in range(len(self.lidarRanges)):
            lidarFormatted.append(str(i) + ': ' + str(self.lidarRanges[i]))
        print(lidarFormatted)

    def _getLogDirectory(self):
        loggingDir = "Logging/"
        latestFile = ""
        latestMod = 0
        for file in os.listdir(loggingDir):
            fileModTime = os.path.getmtime(loggingDir + file)
            if fileModTime > latestMod:
                latestMod = fileModTime
                latestFile = file
        self.logDir = loggingDir + latestFile + "/"
        self.path = self.logDir + "log/"

    def _logEpisode(self):
        self.duration = time.time() - self.start
        self.epInfo.append([
            round(self.duration, 3),
            round(self.totalreward, 3), self.epConc, self.counter, "Goal:",
            self.goal, "Start position:", self.startPosition, "Rewards:",
            self.moveTowardGoalTotalReward, self.obsProximityTotalPunish,
            self.faceObstacleTotalPunish
        ])