def step(self, action, return_obs=False): self.count += 1 self.controller(action) t = vrep.simxGetLastCmdTime(self.clientID) vrep.simxSynchronousTrigger(self.clientID) while vrep.simxGetLastCmdTime(self.clientID) - t < self.dt: pose = vrep.simxGetObjectPosition(self.clientID, self.body_handle, -1, vrep.simx_opmode_streaming)[1] orientation = vrep.simxGetObjectOrientation( self.clientID, self.body_handle, -1, vrep.simx_opmode_streaming)[1][2] goal_pos = vrep.simxGetObjectPosition( self.clientID, self.brick_handle, self.body_handle, vrep.simx_opmode_streaming)[1] joint_pose = [ vrep.simxGetJointPosition(self.clientID, self.joint_handles[idx], vrep.simx_opmode_streaming)[1] for idx in range(6) ] goal_dist = linalg.norm(goal_pos) state1 = joint_pose + goal_pos if self.goal_dist_prev != None: reward = self.reward(self.state0, goal_dist, action) self.goal_dist_prev = goal_dist self.reward_sum += reward sys.stderr.write( '\rstep:%d| goal:% 2.1f,% 2.1f | pose:% 2.1f,% 2.1f | avg.reward:% 4.2f' % (self.count, self.goal[0], self.goal[1], pose[0], pose[1], self.reward_sum / self.count)) if goal_dist < 0.1: done = 1 print(' | Success') else: done = 0 if self.state0 != None: self.replay.add({ 'state0': self.state0, 'action0': action, 'reward': reward, 'state1': state1, 'done': done }) self.state0 = state1 self.action_prev = action self.goal_dist_prev = goal_dist if return_obs: obs = {'state': state1, 'action': action, 'reward': reward} return state1, obs, done else: return state1, done
def start(self): self.vrep_start() t = vrep.simxGetLastCmdTime(self.clientID) vrep.simxSynchronousTrigger(self.clientID) self.controller([0.0, 0.0]) while vrep.simxGetLastCmdTime(self.clientID) - t < self.dt: lrf_bin = vrep.simxGetStringSignal(self.clientID, 'hokuyo_data', vrep.simx_opmode_streaming)[1] pose = vrep.simxGetObjectPosition(self.clientID, self.body_handle, -1, vrep.simx_opmode_streaming)[1]
def start(self): self.vrep_start() lrf = None while type(lrf)==type(None): vrep.simxSynchronousTrigger(self.clientID) lrf = self.get_measurement() target_pos = vrep.simxGetObjectPosition( self.clientID, self.target_handle, self.pose_handle, vrep.simx_opmode_blocking )[1] self.controller([0.0, 0.0]) target_dist = linalg.norm(target_pos[0:2]) target_angle = arctan2(target_pos[1], target_pos[0]) state = list(lrf)+[0.0, 0.0, target_dist/3.5, target_angle/pi] return state, 0
def step(self, action, return_obs = False): success = False self.count += 1 self.controller(action) lrf = None while type(lrf)==type(None): vrep.simxSynchronousTrigger(self.clientID) lrf = self.get_measurement() pose = vrep.simxGetObjectPosition( self.clientID, self.pose_handle, -1, vrep.simx_opmode_blocking )[1] orientation = vrep.simxGetObjectOrientation( self.clientID, self.pose_handle, -1, vrep.simx_opmode_blocking )[1][2] target_pos = vrep.simxGetObjectPosition( self.clientID, self.target_handle, self.pose_handle, vrep.simx_opmode_blocking )[1] target_dist = linalg.norm(target_pos[0:2]) target_angle = arctan2(target_pos[1], target_pos[0]) state1 = list(lrf)+[action[0]/0.26, action[1]/0.8]+[target_dist/3.5, target_angle/pi] if self.target_dist_prev != None: reward = self.reward(lrf, target_dist, action) self.target_dist_prev = target_dist self.reward_sum += reward sys.stderr.write( '\rstep:%d| target:% 2.1f, % 2.1f | pose:% 2.1f, % 2.1f | avg.reward:% 4.2f' %(self.count, self.target[0], self.target[1], pose[0], pose[1], self.reward_sum/self.count) ) if min(lrf) < 0.03142857: done = 1 success = False print(' | Fail') elif target_dist < 0.2: done = 1 success = True print(' | Success') else: done = 0 if self.state0 != None: self.replay.add( { 'state0':self.state0, 'action0':action, 'reward':reward, 'state1':state1, 'done':done } ) self.state0 = state1 self.action_prev = action self.target_dist_prev = target_dist if return_obs: obs = { 'state':state1, 'lidar':list(lrf), 'action':action, 'pose':[pose[0], pose[1], orientation], 'target':[target_dist, target_angle], 'reward':reward, 'success':success } return state1, obs, done else: return state1, done
def step(self, action, return_obs=False): self.count += 1 self.controller(action) t = vrep.simxGetLastCmdTime(self.clientID) vrep.simxSynchronousTrigger(self.clientID) while vrep.simxGetLastCmdTime(self.clientID) - t < self.dt: pose = vrep.simxGetObjectPosition(self.clientID, self.body_handle, -1, vrep.simx_opmode_streaming)[1] orientation = vrep.simxGetObjectOrientation( self.clientID, self.body_handle, -1, vrep.simx_opmode_streaming)[1][2] goal_pos = vrep.simxGetObjectPosition( self.clientID, self.goal_handle, self.body_handle, vrep.simx_opmode_streaming)[1][1:3] lrf_bin = vrep.simxGetStringSignal(self.clientID, 'hokuyo_data', vrep.simx_opmode_streaming)[1] lrf = array(vrep.simxUnpackFloats(lrf_bin), dtype=float) / 5.578 goal_dist = linalg.norm(goal_pos) goal_angle = arctan2(-goal_pos[0], goal_pos[1]) state1 = list(lrf) + [action[0] * 2, action[1]] state1 += [goal_dist/5.578, goal_angle/pi] \ if goal_dist<5.578 else \ [1, goal_angle/pi] if self.goal_dist_prev != None: reward = self.reward(lrf, goal_dist, action) self.goal_dist_prev = goal_dist self.reward_sum += reward sys.stderr.write( '\rstep:%d| goal:% 2.1f, % 2.1f | pose:% 2.1f, % 2.1f | avg.reward:% 4.2f' % (self.count, self.goal[0], self.goal[1], pose[0], pose[1], self.reward_sum / self.count)) if min(lrf) < 0.0358: done = 1 print(' | Fail') elif goal_dist < 0.1: done = 1 print(' | Success') else: done = 0 if self.state0 != None: self.replay.add({ 'state0': self.state0, 'action0': action, 'reward': reward, 'state1': state1, 'done': done }) self.state0 = state1 self.action_prev = action self.goal_dist_prev = goal_dist if return_obs: obs = { 'state': state1, 'lidar': list(lrf), 'action': action, 'pose': [pose[0], pose[1], orientation], 'goal': [goal_dist, goal_angle], 'reward': reward, 'time': t + self.dt } return state1, obs, done else: return state1, done
from environment.vrep_env import Env from environment.env_modules import vrep import time import numpy as np env = Env(19998) # for i in range(1): # env.reset() # vrep.simxStopSimulation(env.clientID, vrep.simx_opmode_oneshot) # print('--------------------------------------') # env.reset() # vrep.simxPauseSimulation(env.clientID, vrep.simx_opmode_oneshot) vrep.simxSynchronous(env.clientID, True) time.sleep(0.5) vrep.simxStartSimulation(env.clientID, vrep.simx_opmode_oneshot) for i in range(5000): vrep.simxSynchronousTrigger(env.clientID) print( vrep.simxGetObjectPosition(env.clientID, env.target_handle, env.ego_handle, vrep.simx_opmode_blocking)[1])