Beispiel #1
0
    def actContinuous(self, exp, action_, bootstrapping=False):
        # Actor should be FIRST here
        # print "Action: " + str(action_)
        # action_ = copy.deepcopy(action_)
        action_ = np.array(action_, dtype='float64')
        (action_, outside_bounds) = clampActionWarn(action_,
                                                    self._action_bounds)

        averageSpeed = exp.getEnvironment().actContinuous(
            action_, bootstrapping=bootstrapping)
        if (self.hasNotFallen(exp)):
            vel_dif = np.abs(self._target_vel - averageSpeed)
            # reward = math.exp((vel_dif*vel_dif)*self._target_vel_weight) # optimal is 0
            if (self._settings["use_parameterized_control"]):
                self.changeParameters()
            reward = reward_smoother(vel_dif, self._settings,
                                     self._target_vel_weight)
            if (self._settings['print_level'] == 'debug'):
                print("target velocity: ", self._target_vel)
                print("velocity diff: ", vel_dif)
                print("reward: ", reward)
        else:
            return 0.0
        reward = reward
        self._reward_sum = self._reward_sum + reward
        # print("Reward Sum: ", self._reward_sum)
        return reward
Beispiel #2
0
 def actContinuous(self, exp, action_, bootstrapping=False):
     import characterSim
     # print ("Executing action")
     action_ = np.array(action_, dtype='float64')
     # Actor should be FIRST here
     # print "Action: " + str(action_)
     action = characterSim.Action()
     # samp = paramSampler.generateRandomSample()
     action.setParams(action_)
     reward = exp.getEnvironment().act(action)
     reward = reward_smoother(reward, self._settings, self._target_vel_weight)
     if ( not np.isfinite(reward)):
         print ("Found some bad reward: ", reward, " for action: ", action_)
         reward = 0
     self._reward_sum = self._reward_sum + reward
     return reward
Beispiel #3
0
    def actContinuous(self, exp, action_, bootstrapping=False):
        # Actor should be FIRST here
        # print "Action: " + str(action_)
        ## Need to make sure this is an vector of doubles
        action_ = np.array(action_, dtype='float64')
        (action_, outside_bounds) = clampActionWarn(action_,
                                                    self._action_bounds)
        position_root = np.array(
            exp.getEnvironment().getActor().getStateEuler()[0:][:3])
        # print ("Relative Right arm pos: ", right_hand_pos-position_root)
        exp.getEnvironment().updateAction(action_)
        steps_ = 0
        vel_error_sum = float(0)
        torque_sum = float(0)
        position_error_sum = float(0)
        pose_error_sum = float(0)
        while (not exp.getEnvironment().needUpdatedAction() or (steps_ == 0)):
            exp.getEnvironment().update()
            simData = exp.getEnvironment().getActor().getSimData()
            position_root = exp.getEnvironment().getActor().getStateEuler(
            )[0:3]
            vel_root = exp.getEnvironment().getActor().getStateEuler()[6:9]
            if (self._settings["print_level"] == 'debug_details'):
                print("avgSpeed: ", simData.avgSpeed, " grabed speed: ",
                      vel_root)
            vel_error_sum += math.fabs(self._target_vel - vel_root[0])
            torque_sum += math.fabs(self._target_torque - simData.avgTorque)

            position_error_sum += math.fabs(self._target_root_height -
                                            position_root[1])

            pose_error_sum += exp.getEnvironment().calcImitationReward()

            steps_ += 1
        averageSpeedError = vel_error_sum / steps_
        averageTorque = torque_sum / steps_
        averagePositionError = position_error_sum / steps_
        averagePoseError = pose_error_sum / steps_

        # averageSpeed = exp.getEnvironment().act(action_)
        # print ("averageSpeed: ", averageSpeed)
        # if (averageSpeed < 0.0):
        #     return 0.0
        # if (exp.getEnvironment().agentHasFallen()):
        #     return 0

        # orientation = exp.getEnvironment().getActor().getStateEuler()[3:][:3]
        # position_root = exp.getEnvironment().getActor().getStateEuler()[0:][:3]
        # print ("Pos: ", position_root)
        # print ("Orientation: ", orientation)
        ## Reward for going the desired velocity
        vel_diff = averageSpeedError
        if (self._settings["print_level"] == 'debug_details'):
            print("vel_diff: ", vel_diff)
        # if ( self._settings["use_parameterized_control"] ):
        vel_bounds = self._settings['controller_parameter_settings'][
            'velocity_bounds']
        vel_diff = _scale_reward([vel_diff], vel_bounds)[0]
        vel_reward = reward_smoother(vel_diff, self._settings,
                                     self._target_vel_weight)
        if (self._settings["print_level"] == 'debug_details'):
            print("vel_reward: ", vel_reward)
        ## Rewarded for using less torque
        torque_diff = averageTorque
        if (self._settings["print_level"] == 'debug_details'):
            print("torque_diff: ", torque_diff)
        _bounds = self._settings['controller_parameter_settings'][
            'torque_bounds']
        torque_diff = _scale_reward([torque_diff], _bounds)[0]
        torque_reward = reward_smoother(torque_diff, self._settings,
                                        self._target_vel_weight)
        if (self._settings["print_level"] == 'debug_details'):
            print("torque_reward: ", torque_reward)
        ## Rewarded for keeping the y height of the root at a specific height
        root_height_diff = (averagePositionError)
        if (self._settings["print_level"] == 'debug_details'):
            print("root_height_diff: ", root_height_diff)
        # if ( self._settings["use_parameterized_control"] ):
        root_height_bounds = self._settings['controller_parameter_settings'][
            'root_height_bounds']
        root_height_diff = _scale_reward([root_height_diff],
                                         root_height_bounds)[0]
        root_height_reward = reward_smoother(root_height_diff, self._settings,
                                             self._target_vel_weight)
        if (self._settings["print_level"] == 'debug_details'):
            print("root_height_reward: ", root_height_reward)

        pose_error = (averagePoseError)
        if (self._settings["print_level"] == 'debug_details'):
            print("pose_error: ", pose_error)
        # if ( self._settings["use_parameterized_control"] ):
        pose_error_bounds = self._settings['controller_parameter_settings'][
            'pose_error_bounds']
        pose_error_diff = _scale_reward([pose_error], pose_error_bounds)[0]
        pose_error_reward = reward_smoother(pose_error_diff, self._settings,
                                            self._target_vel_weight)
        if (self._settings["print_level"] == 'debug_details'):
            print("pose_error_reward: ", pose_error_reward)

        reward = (
            (vel_reward *
             self._settings['controller_reward_weights']['velocity']) +
            (torque_reward *
             self._settings['controller_reward_weights']['torque']) +
            ((root_height_reward) *
             self._settings['controller_reward_weights']['root_height']) +
            ((pose_error_reward) *
             self._settings['controller_reward_weights']['pose_error'])
        )  # optimal is 0
        if (self._settings["print_level"] == 'debug_details'):
            print("Reward: ", reward)

        self._reward_sum = self._reward_sum + reward
        return reward
Beispiel #4
0
    def actContinuous(self, exp, action_, bootstrapping=False):
        # Actor should be FIRST here
        # print "Action: " + str(action_)
        ## Need to make sure this is an vector of doubles
        action_ = np.array(action_, dtype='float64')
        (action_, outside_bounds) = clampActionWarn(action_,
                                                    self._action_bounds)
        # right_hand_pos = np.array(exp.getEnvironment().getActor().getLinkPosition("rLowerarm"))
        position_root = np.array(
            exp.getEnvironment().getActor().getStateEuler()[0:][:3])
        # print ("Relative Right arm pos: ", right_hand_pos-position_root)
        exp.getEnvironment().updateAction(action_)
        steps_ = 0
        vel_sum = float(0)
        torque_sum = float(0)
        pitch_sum = float(0)
        position_sum = float(0)
        right_hand_x_sum = float(0)
        dist_x = float(0)
        while (not exp.getEnvironment().needUpdatedAction() or (steps_ == 0)):
            exp.getEnvironment().update()
            simData = exp.getEnvironment().getActor().getSimData()
            position_root_ = exp.getEnvironment().getActor().getStateEuler(
            )[0:][:3]
            dist_x = dist_x + (position_root_[0] - position_root[0])
            # print ("avgSpeed: ", simData.avgSpeed)
            vel_sum += simData.avgSpeed
            print(
                "I don't calculate the velocity properly, should use character::getCOMVelocity() instead"
            )
            torque_sum += math.fabs(self._target_torque - simData.avgTorque)

            # orientation = exp.getEnvironment().getActor().getStateEuler()[3:][:3]

            position_sum += math.fabs(self._target_root_height -
                                      position_root_[1])

            steps_ += 1

        # print("vel_x:", dist_x*30.0)
        averageSpeed = vel_sum / steps_
        averageTorque = torque_sum / steps_
        averagePosition = position_sum / steps_
        # averageSpeed = exp.getEnvironment().act(action_)
        # print ("root position: ", position_root)
        # print ("averageSpeed: ", averageSpeed)
        # if (averageSpeed < 0.0):
        #     return 0.0
        if (exp.getEnvironment().agentHasFallen()):
            return 0

        # orientation = exp.getEnvironment().getActor().getStateEuler()[3:][:3]
        # position_root = exp.getEnvironment().getActor().getStateEuler()[0:][:3]
        # print ("Pos: ", position_root)
        # print ("Orientation: ", orientation)
        ## Reward for going the desired velocity
        vel_diff = math.fabs(self._target_vel - averageSpeed)
        if (self._settings["print_level"] == 'debug'):
            print("vel_diff: ", vel_diff)
        # if ( self._settings["use_parameterized_control"] ):
        vel_bounds = self._settings['controller_parameter_settings'][
            'velocity_bounds']
        vel_diff = _scale_reward([vel_diff], vel_bounds)[0]
        if (self._settings["print_level"] == 'debug'):
            print("vel_diff: ", vel_diff)
        vel_reward = reward_smoother(vel_diff, self._settings,
                                     self._target_vel_weight)
        ## Rewarded for using less torque
        torque_diff = averageTorque
        _bounds = self._settings['controller_parameter_settings'][
            'torque_bounds']
        torque_diff = _scale_reward([torque_diff], _bounds)[0]
        torque_reward = reward_smoother(torque_diff, self._settings,
                                        self._target_vel_weight)
        ## Rewarded for keeping the y height of the root at a specific height
        root_height_diff = (averagePosition)
        if (self._settings["print_level"] == 'debug'):
            print("root_height_diff: ", root_height_diff)
        # if ( self._settings["use_parameterized_control"] ):
        root_height_bounds = self._settings['controller_parameter_settings'][
            'root_height_bounds']
        root_height_diff = _scale_reward([root_height_diff],
                                         root_height_bounds)[0]
        if (self._settings["print_level"] == 'debug'):
            print("root_height_diff: ", root_height_diff)
        root_height_reward = reward_smoother(root_height_diff, self._settings,
                                             self._target_vel_weight)

        # print ("vel reward: ", vel_reward, " torque reward: ", torque_reward )
        reward = (
            (vel_reward *
             self._settings['controller_reward_weights']['velocity'])
            # + (torque_reward * self._settings['controller_reward_weights']['torque']) +
            # (lean_reward * self._settings['controller_reward_weights']['root_pitch']) +
            # + ((root_height_reward) * self._settings['controller_reward_weights']['root_height'])
            # (right_hand_pos_x_reward * self._settings['controller_reward_weights']['right_hand_x_pos'])
        )  # optimal is 0

        self._reward_sum = self._reward_sum + reward
        if (self._settings["use_parameterized_control"]):
            self.changeParameters()
        return reward