예제 #1
0
 def step(self, action, return_obs=False):
     self.count += 1
     self.controller(action)
     t = vrep.simxGetLastCmdTime(self.clientID)
     vrep.simxSynchronousTrigger(self.clientID)
     while vrep.simxGetLastCmdTime(self.clientID) - t < self.dt:
         pose = vrep.simxGetObjectPosition(self.clientID, self.body_handle,
                                           -1,
                                           vrep.simx_opmode_streaming)[1]
         orientation = vrep.simxGetObjectOrientation(
             self.clientID, self.body_handle, -1,
             vrep.simx_opmode_streaming)[1][2]
         goal_pos = vrep.simxGetObjectPosition(
             self.clientID, self.brick_handle, self.body_handle,
             vrep.simx_opmode_streaming)[1]
         joint_pose = [
             vrep.simxGetJointPosition(self.clientID,
                                       self.joint_handles[idx],
                                       vrep.simx_opmode_streaming)[1]
             for idx in range(6)
         ]
     goal_dist = linalg.norm(goal_pos)
     state1 = joint_pose + goal_pos
     if self.goal_dist_prev != None:
         reward = self.reward(self.state0, goal_dist, action)
         self.goal_dist_prev = goal_dist
         self.reward_sum += reward
     sys.stderr.write(
         '\rstep:%d| goal:% 2.1f,% 2.1f | pose:% 2.1f,% 2.1f | avg.reward:% 4.2f'
         % (self.count, self.goal[0], self.goal[1], pose[0], pose[1],
            self.reward_sum / self.count))
     if goal_dist < 0.1:
         done = 1
         print(' | Success')
     else:
         done = 0
     if self.state0 != None:
         self.replay.add({
             'state0': self.state0,
             'action0': action,
             'reward': reward,
             'state1': state1,
             'done': done
         })
     self.state0 = state1
     self.action_prev = action
     self.goal_dist_prev = goal_dist
     if return_obs:
         obs = {'state': state1, 'action': action, 'reward': reward}
         return state1, obs, done
     else:
         return state1, done
예제 #2
0
 def start(self):
     self.vrep_start()
     t = vrep.simxGetLastCmdTime(self.clientID)
     vrep.simxSynchronousTrigger(self.clientID)
     self.controller([0.0, 0.0])
     while vrep.simxGetLastCmdTime(self.clientID) - t < self.dt:
         lrf_bin = vrep.simxGetStringSignal(self.clientID, 'hokuyo_data',
                                            vrep.simx_opmode_streaming)[1]
         pose = vrep.simxGetObjectPosition(self.clientID, self.body_handle,
                                           -1,
                                           vrep.simx_opmode_streaming)[1]
예제 #3
0
 def start(self):
     self.vrep_start()
     lrf = None
     while type(lrf)==type(None):
         vrep.simxSynchronousTrigger(self.clientID)
         lrf = self.get_measurement()
     target_pos = vrep.simxGetObjectPosition(
         self.clientID,
         self.target_handle,
         self.pose_handle,
         vrep.simx_opmode_blocking
     )[1]
     self.controller([0.0, 0.0])
     target_dist = linalg.norm(target_pos[0:2])
     target_angle = arctan2(target_pos[1], target_pos[0])
     state = list(lrf)+[0.0, 0.0, target_dist/3.5, target_angle/pi]
     return state, 0
예제 #4
0
 def step(self, action, return_obs = False):
     success = False
     self.count += 1
     self.controller(action)
     lrf = None
     while type(lrf)==type(None):
         vrep.simxSynchronousTrigger(self.clientID)
         lrf = self.get_measurement()
     pose = vrep.simxGetObjectPosition(
         self.clientID,
         self.pose_handle,
         -1,
         vrep.simx_opmode_blocking
     )[1]
     orientation = vrep.simxGetObjectOrientation(
         self.clientID,
         self.pose_handle,
         -1,
         vrep.simx_opmode_blocking
     )[1][2]
     target_pos = vrep.simxGetObjectPosition(
         self.clientID,
         self.target_handle,
         self.pose_handle,
         vrep.simx_opmode_blocking
     )[1]
     target_dist = linalg.norm(target_pos[0:2])
     target_angle = arctan2(target_pos[1], target_pos[0])
     state1 = list(lrf)+[action[0]/0.26, action[1]/0.8]+[target_dist/3.5, target_angle/pi]
     if self.target_dist_prev != None:
         reward = self.reward(lrf, target_dist, action)
         self.target_dist_prev = target_dist
         self.reward_sum += reward
     sys.stderr.write(
         '\rstep:%d| target:% 2.1f, % 2.1f | pose:% 2.1f, % 2.1f | avg.reward:% 4.2f'
         %(self.count, self.target[0], self.target[1], pose[0], pose[1], self.reward_sum/self.count)
     )
     if min(lrf) < 0.03142857:
         done = 1
         success = False
         print(' | Fail')
     elif target_dist < 0.2:
         done = 1
         success = True
         print(' | Success')
     else:
         done = 0
     if self.state0 != None:
         self.replay.add(
             {
                 'state0':self.state0,
                 'action0':action,
                 'reward':reward,
                 'state1':state1,
                 'done':done
             }
         )
     self.state0 = state1
     self.action_prev = action
     self.target_dist_prev = target_dist
     if return_obs:
         obs = {
             'state':state1, 
             'lidar':list(lrf), 
             'action':action, 
             'pose':[pose[0], pose[1], orientation], 
             'target':[target_dist, target_angle], 
             'reward':reward,
             'success':success
         }
         return state1, obs, done
     else:
         return state1, done
 def step(self, action, return_obs=False):
     self.count += 1
     self.controller(action)
     t = vrep.simxGetLastCmdTime(self.clientID)
     vrep.simxSynchronousTrigger(self.clientID)
     while vrep.simxGetLastCmdTime(self.clientID) - t < self.dt:
         pose = vrep.simxGetObjectPosition(self.clientID, self.body_handle,
                                           -1,
                                           vrep.simx_opmode_streaming)[1]
         orientation = vrep.simxGetObjectOrientation(
             self.clientID, self.body_handle, -1,
             vrep.simx_opmode_streaming)[1][2]
         goal_pos = vrep.simxGetObjectPosition(
             self.clientID, self.goal_handle, self.body_handle,
             vrep.simx_opmode_streaming)[1][1:3]
         lrf_bin = vrep.simxGetStringSignal(self.clientID, 'hokuyo_data',
                                            vrep.simx_opmode_streaming)[1]
         lrf = array(vrep.simxUnpackFloats(lrf_bin), dtype=float) / 5.578
     goal_dist = linalg.norm(goal_pos)
     goal_angle = arctan2(-goal_pos[0], goal_pos[1])
     state1 = list(lrf) + [action[0] * 2, action[1]]
     state1 += [goal_dist/5.578, goal_angle/pi] \
                   if goal_dist<5.578 else \
               [1, goal_angle/pi]
     if self.goal_dist_prev != None:
         reward = self.reward(lrf, goal_dist, action)
         self.goal_dist_prev = goal_dist
         self.reward_sum += reward
     sys.stderr.write(
         '\rstep:%d| goal:% 2.1f, % 2.1f | pose:% 2.1f, % 2.1f | avg.reward:% 4.2f'
         % (self.count, self.goal[0], self.goal[1], pose[0], pose[1],
            self.reward_sum / self.count))
     if min(lrf) < 0.0358:
         done = 1
         print(' | Fail')
     elif goal_dist < 0.1:
         done = 1
         print(' | Success')
     else:
         done = 0
     if self.state0 != None:
         self.replay.add({
             'state0': self.state0,
             'action0': action,
             'reward': reward,
             'state1': state1,
             'done': done
         })
     self.state0 = state1
     self.action_prev = action
     self.goal_dist_prev = goal_dist
     if return_obs:
         obs = {
             'state': state1,
             'lidar': list(lrf),
             'action': action,
             'pose': [pose[0], pose[1], orientation],
             'goal': [goal_dist, goal_angle],
             'reward': reward,
             'time': t + self.dt
         }
         return state1, obs, done
     else:
         return state1, done
예제 #6
0
from environment.vrep_env import Env
from environment.env_modules import vrep
import time
import numpy as np
env = Env(19998)
# for i in range(1):
#     env.reset()
#     vrep.simxStopSimulation(env.clientID, vrep.simx_opmode_oneshot)
#     print('--------------------------------------')
# env.reset()
# vrep.simxPauseSimulation(env.clientID, vrep.simx_opmode_oneshot)

vrep.simxSynchronous(env.clientID, True)
time.sleep(0.5)
vrep.simxStartSimulation(env.clientID, vrep.simx_opmode_oneshot)
for i in range(5000):
    vrep.simxSynchronousTrigger(env.clientID)

    print(
        vrep.simxGetObjectPosition(env.clientID, env.target_handle,
                                   env.ego_handle,
                                   vrep.simx_opmode_blocking)[1])