def actContinuous(self, exp, action_, bootstrapping=False): # Actor should be FIRST here # print "Action: " + str(action_) # action_ = copy.deepcopy(action_) action_ = np.array(action_, dtype='float64') (action_, outside_bounds) = clampActionWarn(action_, self._action_bounds) averageSpeed = exp.getEnvironment().actContinuous( action_, bootstrapping=bootstrapping) if (self.hasNotFallen(exp)): vel_dif = np.abs(self._target_vel - averageSpeed) # reward = math.exp((vel_dif*vel_dif)*self._target_vel_weight) # optimal is 0 if (self._settings["use_parameterized_control"]): self.changeParameters() reward = reward_smoother(vel_dif, self._settings, self._target_vel_weight) if (self._settings['print_level'] == 'debug'): print("target velocity: ", self._target_vel) print("velocity diff: ", vel_dif) print("reward: ", reward) else: return 0.0 reward = reward self._reward_sum = self._reward_sum + reward # print("Reward Sum: ", self._reward_sum) return reward
def actContinuous(self, exp, action_, bootstrapping=False): import characterSim # print ("Executing action") action_ = np.array(action_, dtype='float64') # Actor should be FIRST here # print "Action: " + str(action_) action = characterSim.Action() # samp = paramSampler.generateRandomSample() action.setParams(action_) reward = exp.getEnvironment().act(action) reward = reward_smoother(reward, self._settings, self._target_vel_weight) if ( not np.isfinite(reward)): print ("Found some bad reward: ", reward, " for action: ", action_) reward = 0 self._reward_sum = self._reward_sum + reward return reward
def actContinuous(self, exp, action_, bootstrapping=False): # Actor should be FIRST here # print "Action: " + str(action_) ## Need to make sure this is an vector of doubles action_ = np.array(action_, dtype='float64') (action_, outside_bounds) = clampActionWarn(action_, self._action_bounds) position_root = np.array( exp.getEnvironment().getActor().getStateEuler()[0:][:3]) # print ("Relative Right arm pos: ", right_hand_pos-position_root) exp.getEnvironment().updateAction(action_) steps_ = 0 vel_error_sum = float(0) torque_sum = float(0) position_error_sum = float(0) pose_error_sum = float(0) while (not exp.getEnvironment().needUpdatedAction() or (steps_ == 0)): exp.getEnvironment().update() simData = exp.getEnvironment().getActor().getSimData() position_root = exp.getEnvironment().getActor().getStateEuler( )[0:3] vel_root = exp.getEnvironment().getActor().getStateEuler()[6:9] if (self._settings["print_level"] == 'debug_details'): print("avgSpeed: ", simData.avgSpeed, " grabed speed: ", vel_root) vel_error_sum += math.fabs(self._target_vel - vel_root[0]) torque_sum += math.fabs(self._target_torque - simData.avgTorque) position_error_sum += math.fabs(self._target_root_height - position_root[1]) pose_error_sum += exp.getEnvironment().calcImitationReward() steps_ += 1 averageSpeedError = vel_error_sum / steps_ averageTorque = torque_sum / steps_ averagePositionError = position_error_sum / steps_ averagePoseError = pose_error_sum / steps_ # averageSpeed = exp.getEnvironment().act(action_) # print ("averageSpeed: ", averageSpeed) # if (averageSpeed < 0.0): # return 0.0 # if (exp.getEnvironment().agentHasFallen()): # return 0 # orientation = exp.getEnvironment().getActor().getStateEuler()[3:][:3] # position_root = exp.getEnvironment().getActor().getStateEuler()[0:][:3] # print ("Pos: ", position_root) # print ("Orientation: ", orientation) ## Reward for going the desired velocity vel_diff = averageSpeedError if (self._settings["print_level"] == 'debug_details'): print("vel_diff: ", vel_diff) # if ( self._settings["use_parameterized_control"] ): vel_bounds = self._settings['controller_parameter_settings'][ 'velocity_bounds'] vel_diff = _scale_reward([vel_diff], vel_bounds)[0] vel_reward = reward_smoother(vel_diff, self._settings, self._target_vel_weight) if (self._settings["print_level"] == 'debug_details'): print("vel_reward: ", vel_reward) ## Rewarded for using less torque torque_diff = averageTorque if (self._settings["print_level"] == 'debug_details'): print("torque_diff: ", torque_diff) _bounds = self._settings['controller_parameter_settings'][ 'torque_bounds'] torque_diff = _scale_reward([torque_diff], _bounds)[0] torque_reward = reward_smoother(torque_diff, self._settings, self._target_vel_weight) if (self._settings["print_level"] == 'debug_details'): print("torque_reward: ", torque_reward) ## Rewarded for keeping the y height of the root at a specific height root_height_diff = (averagePositionError) if (self._settings["print_level"] == 'debug_details'): print("root_height_diff: ", root_height_diff) # if ( self._settings["use_parameterized_control"] ): root_height_bounds = self._settings['controller_parameter_settings'][ 'root_height_bounds'] root_height_diff = _scale_reward([root_height_diff], root_height_bounds)[0] root_height_reward = reward_smoother(root_height_diff, self._settings, self._target_vel_weight) if (self._settings["print_level"] == 'debug_details'): print("root_height_reward: ", root_height_reward) pose_error = (averagePoseError) if (self._settings["print_level"] == 'debug_details'): print("pose_error: ", pose_error) # if ( self._settings["use_parameterized_control"] ): pose_error_bounds = self._settings['controller_parameter_settings'][ 'pose_error_bounds'] pose_error_diff = _scale_reward([pose_error], pose_error_bounds)[0] pose_error_reward = reward_smoother(pose_error_diff, self._settings, self._target_vel_weight) if (self._settings["print_level"] == 'debug_details'): print("pose_error_reward: ", pose_error_reward) reward = ( (vel_reward * self._settings['controller_reward_weights']['velocity']) + (torque_reward * self._settings['controller_reward_weights']['torque']) + ((root_height_reward) * self._settings['controller_reward_weights']['root_height']) + ((pose_error_reward) * self._settings['controller_reward_weights']['pose_error']) ) # optimal is 0 if (self._settings["print_level"] == 'debug_details'): print("Reward: ", reward) self._reward_sum = self._reward_sum + reward return reward
def actContinuous(self, exp, action_, bootstrapping=False): # Actor should be FIRST here # print "Action: " + str(action_) ## Need to make sure this is an vector of doubles action_ = np.array(action_, dtype='float64') (action_, outside_bounds) = clampActionWarn(action_, self._action_bounds) # right_hand_pos = np.array(exp.getEnvironment().getActor().getLinkPosition("rLowerarm")) position_root = np.array( exp.getEnvironment().getActor().getStateEuler()[0:][:3]) # print ("Relative Right arm pos: ", right_hand_pos-position_root) exp.getEnvironment().updateAction(action_) steps_ = 0 vel_sum = float(0) torque_sum = float(0) pitch_sum = float(0) position_sum = float(0) right_hand_x_sum = float(0) dist_x = float(0) while (not exp.getEnvironment().needUpdatedAction() or (steps_ == 0)): exp.getEnvironment().update() simData = exp.getEnvironment().getActor().getSimData() position_root_ = exp.getEnvironment().getActor().getStateEuler( )[0:][:3] dist_x = dist_x + (position_root_[0] - position_root[0]) # print ("avgSpeed: ", simData.avgSpeed) vel_sum += simData.avgSpeed print( "I don't calculate the velocity properly, should use character::getCOMVelocity() instead" ) torque_sum += math.fabs(self._target_torque - simData.avgTorque) # orientation = exp.getEnvironment().getActor().getStateEuler()[3:][:3] position_sum += math.fabs(self._target_root_height - position_root_[1]) steps_ += 1 # print("vel_x:", dist_x*30.0) averageSpeed = vel_sum / steps_ averageTorque = torque_sum / steps_ averagePosition = position_sum / steps_ # averageSpeed = exp.getEnvironment().act(action_) # print ("root position: ", position_root) # print ("averageSpeed: ", averageSpeed) # if (averageSpeed < 0.0): # return 0.0 if (exp.getEnvironment().agentHasFallen()): return 0 # orientation = exp.getEnvironment().getActor().getStateEuler()[3:][:3] # position_root = exp.getEnvironment().getActor().getStateEuler()[0:][:3] # print ("Pos: ", position_root) # print ("Orientation: ", orientation) ## Reward for going the desired velocity vel_diff = math.fabs(self._target_vel - averageSpeed) if (self._settings["print_level"] == 'debug'): print("vel_diff: ", vel_diff) # if ( self._settings["use_parameterized_control"] ): vel_bounds = self._settings['controller_parameter_settings'][ 'velocity_bounds'] vel_diff = _scale_reward([vel_diff], vel_bounds)[0] if (self._settings["print_level"] == 'debug'): print("vel_diff: ", vel_diff) vel_reward = reward_smoother(vel_diff, self._settings, self._target_vel_weight) ## Rewarded for using less torque torque_diff = averageTorque _bounds = self._settings['controller_parameter_settings'][ 'torque_bounds'] torque_diff = _scale_reward([torque_diff], _bounds)[0] torque_reward = reward_smoother(torque_diff, self._settings, self._target_vel_weight) ## Rewarded for keeping the y height of the root at a specific height root_height_diff = (averagePosition) if (self._settings["print_level"] == 'debug'): print("root_height_diff: ", root_height_diff) # if ( self._settings["use_parameterized_control"] ): root_height_bounds = self._settings['controller_parameter_settings'][ 'root_height_bounds'] root_height_diff = _scale_reward([root_height_diff], root_height_bounds)[0] if (self._settings["print_level"] == 'debug'): print("root_height_diff: ", root_height_diff) root_height_reward = reward_smoother(root_height_diff, self._settings, self._target_vel_weight) # print ("vel reward: ", vel_reward, " torque reward: ", torque_reward ) reward = ( (vel_reward * self._settings['controller_reward_weights']['velocity']) # + (torque_reward * self._settings['controller_reward_weights']['torque']) + # (lean_reward * self._settings['controller_reward_weights']['root_pitch']) + # + ((root_height_reward) * self._settings['controller_reward_weights']['root_height']) # (right_hand_pos_x_reward * self._settings['controller_reward_weights']['right_hand_x_pos']) ) # optimal is 0 self._reward_sum = self._reward_sum + reward if (self._settings["use_parameterized_control"]): self.changeParameters() return reward