Esempio n. 1
0
    def actContinuous(self, exp, action_, bootstrapping=False):
        # Actor should be FIRST here
        # print "Action: " + str(action_)
        # action_ = copy.deepcopy(action_)
        action_ = np.array(action_, dtype='float64')
        (action_, outside_bounds) = clampActionWarn(action_,
                                                    self._action_bounds)

        averageSpeed = exp.getEnvironment().actContinuous(
            action_, bootstrapping=bootstrapping)
        if (self.hasNotFallen(exp)):
            vel_dif = np.abs(self._target_vel - averageSpeed)
            # reward = math.exp((vel_dif*vel_dif)*self._target_vel_weight) # optimal is 0
            if (self._settings["use_parameterized_control"]):
                self.changeParameters()
            reward = reward_smoother(vel_dif, self._settings,
                                     self._target_vel_weight)
            if (self._settings['print_level'] == 'debug'):
                print("target velocity: ", self._target_vel)
                print("velocity diff: ", vel_dif)
                print("reward: ", reward)
        else:
            return 0.0
        reward = reward
        self._reward_sum = self._reward_sum + reward
        # print("Reward Sum: ", self._reward_sum)
        return reward
Esempio n. 2
0
    def actContinuous(self, exp, action_, bootstrapping=False):
        # Actor should be FIRST here
        # print ("Action: " + str(action_))
        # if (settings["clamp_actions_to_stay_inside_bounds"] or (settings['penalize_actions_outside_bounds'])):
        (action_, outside_bounds) = clampActionWarn(action_,
                                                    self._action_bounds)
        #     if (settings['clamp_actions_to_stay_inside_bounds']):
        #         action_ = action__
        dist = exp.getEnvironment().actContinuous(action_,
                                                  bootstrapping=bootstrapping)
        if (dist > 0):
            self._reward_sum = self._reward_sum + dist
            return dist
        if (self.hasNotFallen(exp)):
            # vel_dif = np.abs(self._target_vel - dist)
            # reward = math.exp((vel_dif*vel_dif)*self._target_vel_weight) # optimal is 0
            # reward = reward_smoother(vel_dif, self._settings, self._target_vel_weight)
            reward = dist
        else:
            return 0.0

        self._reward_sum = self._reward_sum + reward
        return reward
Esempio n. 3
0
    def actContinuous(self, exp, action_, bootstrapping=False):
        # Actor should be FIRST here
        # print "Action: " + str(action_)
        ## Need to make sure this is an vector of doubles
        action_ = np.array(action_, dtype='float64')
        (action_, outside_bounds) = clampActionWarn(action_,
                                                    self._action_bounds)
        position_root = np.array(
            exp.getEnvironment().getActor().getStateEuler()[0:][:3])
        # print ("Relative Right arm pos: ", right_hand_pos-position_root)
        exp.getEnvironment().updateAction(action_)
        steps_ = 0
        vel_error_sum = float(0)
        torque_sum = float(0)
        position_error_sum = float(0)
        pose_error_sum = float(0)
        while (not exp.getEnvironment().needUpdatedAction() or (steps_ == 0)):
            exp.getEnvironment().update()
            simData = exp.getEnvironment().getActor().getSimData()
            position_root = exp.getEnvironment().getActor().getStateEuler(
            )[0:3]
            vel_root = exp.getEnvironment().getActor().getStateEuler()[6:9]
            if (self._settings["print_level"] == 'debug_details'):
                print("avgSpeed: ", simData.avgSpeed, " grabed speed: ",
                      vel_root)
            vel_error_sum += math.fabs(self._target_vel - vel_root[0])
            torque_sum += math.fabs(self._target_torque - simData.avgTorque)

            position_error_sum += math.fabs(self._target_root_height -
                                            position_root[1])

            pose_error_sum += exp.getEnvironment().calcImitationReward()

            steps_ += 1
        averageSpeedError = vel_error_sum / steps_
        averageTorque = torque_sum / steps_
        averagePositionError = position_error_sum / steps_
        averagePoseError = pose_error_sum / steps_

        # averageSpeed = exp.getEnvironment().act(action_)
        # print ("averageSpeed: ", averageSpeed)
        # if (averageSpeed < 0.0):
        #     return 0.0
        # if (exp.getEnvironment().agentHasFallen()):
        #     return 0

        # orientation = exp.getEnvironment().getActor().getStateEuler()[3:][:3]
        # position_root = exp.getEnvironment().getActor().getStateEuler()[0:][:3]
        # print ("Pos: ", position_root)
        # print ("Orientation: ", orientation)
        ## Reward for going the desired velocity
        vel_diff = averageSpeedError
        if (self._settings["print_level"] == 'debug_details'):
            print("vel_diff: ", vel_diff)
        # if ( self._settings["use_parameterized_control"] ):
        vel_bounds = self._settings['controller_parameter_settings'][
            'velocity_bounds']
        vel_diff = _scale_reward([vel_diff], vel_bounds)[0]
        vel_reward = reward_smoother(vel_diff, self._settings,
                                     self._target_vel_weight)
        if (self._settings["print_level"] == 'debug_details'):
            print("vel_reward: ", vel_reward)
        ## Rewarded for using less torque
        torque_diff = averageTorque
        if (self._settings["print_level"] == 'debug_details'):
            print("torque_diff: ", torque_diff)
        _bounds = self._settings['controller_parameter_settings'][
            'torque_bounds']
        torque_diff = _scale_reward([torque_diff], _bounds)[0]
        torque_reward = reward_smoother(torque_diff, self._settings,
                                        self._target_vel_weight)
        if (self._settings["print_level"] == 'debug_details'):
            print("torque_reward: ", torque_reward)
        ## Rewarded for keeping the y height of the root at a specific height
        root_height_diff = (averagePositionError)
        if (self._settings["print_level"] == 'debug_details'):
            print("root_height_diff: ", root_height_diff)
        # if ( self._settings["use_parameterized_control"] ):
        root_height_bounds = self._settings['controller_parameter_settings'][
            'root_height_bounds']
        root_height_diff = _scale_reward([root_height_diff],
                                         root_height_bounds)[0]
        root_height_reward = reward_smoother(root_height_diff, self._settings,
                                             self._target_vel_weight)
        if (self._settings["print_level"] == 'debug_details'):
            print("root_height_reward: ", root_height_reward)

        pose_error = (averagePoseError)
        if (self._settings["print_level"] == 'debug_details'):
            print("pose_error: ", pose_error)
        # if ( self._settings["use_parameterized_control"] ):
        pose_error_bounds = self._settings['controller_parameter_settings'][
            'pose_error_bounds']
        pose_error_diff = _scale_reward([pose_error], pose_error_bounds)[0]
        pose_error_reward = reward_smoother(pose_error_diff, self._settings,
                                            self._target_vel_weight)
        if (self._settings["print_level"] == 'debug_details'):
            print("pose_error_reward: ", pose_error_reward)

        reward = (
            (vel_reward *
             self._settings['controller_reward_weights']['velocity']) +
            (torque_reward *
             self._settings['controller_reward_weights']['torque']) +
            ((root_height_reward) *
             self._settings['controller_reward_weights']['root_height']) +
            ((pose_error_reward) *
             self._settings['controller_reward_weights']['pose_error'])
        )  # optimal is 0
        if (self._settings["print_level"] == 'debug_details'):
            print("Reward: ", reward)

        self._reward_sum = self._reward_sum + reward
        return reward
Esempio n. 4
0
 def updateAction(self, sim, action_):
     action_ = np.array(action_, dtype='float64')
     (action_, outside_bounds) = clampActionWarn(action_,
                                                 self._action_bounds)
     sim.getEnvironment().updateAction(action_)
Esempio n. 5
0
    def actContinuous(self, exp, action_, bootstrapping=False):
        # Actor should be FIRST here
        # print "Action: " + str(action_)
        ## Need to make sure this is an vector of doubles
        action_ = np.array(action_, dtype='float64')
        (action_, outside_bounds) = clampActionWarn(action_,
                                                    self._action_bounds)
        # right_hand_pos = np.array(exp.getEnvironment().getActor().getLinkPosition("rLowerarm"))
        position_root = np.array(
            exp.getEnvironment().getActor().getStateEuler()[0:][:3])
        # print ("Relative Right arm pos: ", right_hand_pos-position_root)
        exp.getEnvironment().updateAction(action_)
        steps_ = 0
        vel_sum = float(0)
        torque_sum = float(0)
        pitch_sum = float(0)
        position_sum = float(0)
        right_hand_x_sum = float(0)
        dist_x = float(0)
        while (not exp.getEnvironment().needUpdatedAction() or (steps_ == 0)):
            exp.getEnvironment().update()
            simData = exp.getEnvironment().getActor().getSimData()
            position_root_ = exp.getEnvironment().getActor().getStateEuler(
            )[0:][:3]
            dist_x = dist_x + (position_root_[0] - position_root[0])
            # print ("avgSpeed: ", simData.avgSpeed)
            vel_sum += simData.avgSpeed
            print(
                "I don't calculate the velocity properly, should use character::getCOMVelocity() instead"
            )
            torque_sum += math.fabs(self._target_torque - simData.avgTorque)

            # orientation = exp.getEnvironment().getActor().getStateEuler()[3:][:3]

            position_sum += math.fabs(self._target_root_height -
                                      position_root_[1])

            steps_ += 1

        # print("vel_x:", dist_x*30.0)
        averageSpeed = vel_sum / steps_
        averageTorque = torque_sum / steps_
        averagePosition = position_sum / steps_
        # averageSpeed = exp.getEnvironment().act(action_)
        # print ("root position: ", position_root)
        # print ("averageSpeed: ", averageSpeed)
        # if (averageSpeed < 0.0):
        #     return 0.0
        if (exp.getEnvironment().agentHasFallen()):
            return 0

        # orientation = exp.getEnvironment().getActor().getStateEuler()[3:][:3]
        # position_root = exp.getEnvironment().getActor().getStateEuler()[0:][:3]
        # print ("Pos: ", position_root)
        # print ("Orientation: ", orientation)
        ## Reward for going the desired velocity
        vel_diff = math.fabs(self._target_vel - averageSpeed)
        if (self._settings["print_level"] == 'debug'):
            print("vel_diff: ", vel_diff)
        # if ( self._settings["use_parameterized_control"] ):
        vel_bounds = self._settings['controller_parameter_settings'][
            'velocity_bounds']
        vel_diff = _scale_reward([vel_diff], vel_bounds)[0]
        if (self._settings["print_level"] == 'debug'):
            print("vel_diff: ", vel_diff)
        vel_reward = reward_smoother(vel_diff, self._settings,
                                     self._target_vel_weight)
        ## Rewarded for using less torque
        torque_diff = averageTorque
        _bounds = self._settings['controller_parameter_settings'][
            'torque_bounds']
        torque_diff = _scale_reward([torque_diff], _bounds)[0]
        torque_reward = reward_smoother(torque_diff, self._settings,
                                        self._target_vel_weight)
        ## Rewarded for keeping the y height of the root at a specific height
        root_height_diff = (averagePosition)
        if (self._settings["print_level"] == 'debug'):
            print("root_height_diff: ", root_height_diff)
        # if ( self._settings["use_parameterized_control"] ):
        root_height_bounds = self._settings['controller_parameter_settings'][
            'root_height_bounds']
        root_height_diff = _scale_reward([root_height_diff],
                                         root_height_bounds)[0]
        if (self._settings["print_level"] == 'debug'):
            print("root_height_diff: ", root_height_diff)
        root_height_reward = reward_smoother(root_height_diff, self._settings,
                                             self._target_vel_weight)

        # print ("vel reward: ", vel_reward, " torque reward: ", torque_reward )
        reward = (
            (vel_reward *
             self._settings['controller_reward_weights']['velocity'])
            # + (torque_reward * self._settings['controller_reward_weights']['torque']) +
            # (lean_reward * self._settings['controller_reward_weights']['root_pitch']) +
            # + ((root_height_reward) * self._settings['controller_reward_weights']['root_height'])
            # (right_hand_pos_x_reward * self._settings['controller_reward_weights']['right_hand_x_pos'])
        )  # optimal is 0

        self._reward_sum = self._reward_sum + reward
        if (self._settings["use_parameterized_control"]):
            self.changeParameters()
        return reward