def step(self, action): self.supervisor.step(self.timeStep) self.timer += 1 # read state uncode_state, self.state = self.__get_state() # # record graph # self.plt_current_time += self.timeStep * 0.001 # self.plt_time.append(self.plt_current_time) # self.plt_FX.append(self.state[6]) # self.plt_FY.append(self.state[7]) # self.plt_FZ.append(self.state[8]) # self.plt_TX.append(self.state[9]) # self.plt_TY.append(self.state[10]) # self.plt_TZ.append(self.state[11]) # adjust action to usable motion action = cal.actions(self.state, action, self.pd) # print('action', action) # take actions self.__execute_action(action) # uncode_state, self.state = self.__get_state() # safety check safe = cal.safetycheck(self.state) # done and reward r, done = cal.reward_step(self.state, safe, self.timer) return self.state, uncode_state, r, done, safe
def trajectory(self, context, w): # set pd parameters kp = w[:, :6][0] kd = w[:, 6:][0] self.env.pd_control(kd, kp) # Start artificial trajectory observation = np.array([ 0., -0.327, -53.77, 0., 0., 0., -0.001, 0., -0.604, 0., 0.001, 0. ]) # init observation ep_reward = 0 for j in range(self.MAX_EP_STEPS): action = self.ddpg.select_action(observation) action = np.clip(np.random.normal(action, self.var), -self.action_bound, self.action_bound) action = cal.actions(observation, action, True) ds = self.state_transfer_model.predict(np.array([action]), return_std=0, return_cov=0)[0] new_pos = ds + observation[:6] new_force = self.contact_model.predict(np.array([new_pos]), return_std=0, return_cov=0)[0] observation_ = np.hstack((new_pos, new_force)) # judge the safety and done, then calculate the reward reward = -0.01 ep_reward += reward if observation_[6] >= 50 or observation_[7] >= 50 or observation_[ 8] >= 200 or observation_[9] >= 3 or observation_[ 10] >= 3 or observation_[11] >= 3: reward = (-1 + (observation_[2] + 52.7) / 40) ep_reward += reward break if observation_[2] > -12: reward = 1 - j / self.MAX_EP_STEPS ep_reward += reward break return ep_reward
def step(self, action): # set FK or IK vrep.simxSetIntegerSignal(self.clientID, "movementMode", self.movementMode, vrep.simx_opmode_oneshot) # read force sensor self.errorCode, self.forceState, self.forceVector, self.torqueVector = \ vrep.simxReadForceSensor(self.clientID, self.force_sensor_handle, vrep.simx_opmode_buffer) self.errorCode, self.position = \ vrep.simxGetObjectPosition(self.clientID, self.force_sensor_handle, self.target_handle, vrep.simx_opmode_buffer) # calculations # adjust action to usable motion action = cal.actions(action, self.movementMode) # take actions if self.movementMode: # in IK mode # do action self.IK['Pos_x'] += action[0] self.IK['Pos_y'] += action[1] self.IK['Pos_z'] += action[2] self.IK['Alpha'] += action[4] self.IK['Beta'] += action[3] self.IK['Gamma'] += action[5] # send signal vrep.simxSetFloatSignal(self.clientID, "pos_X", self.IK['Pos_x'], vrep.simx_opmode_oneshot) vrep.simxSetFloatSignal(self.clientID, "pos_Y", self.IK['Pos_y'], vrep.simx_opmode_oneshot) vrep.simxSetFloatSignal(self.clientID, "pos_Z", self.IK['Pos_z'], vrep.simx_opmode_oneshot) vrep.simxSetFloatSignal(self.clientID, "Alpha", self.IK['Alpha'], vrep.simx_opmode_oneshot) vrep.simxSetFloatSignal(self.clientID, "Beta", self.IK['Beta'], vrep.simx_opmode_oneshot) vrep.simxSetFloatSignal(self.clientID, "Gamma", self.IK['Gamma'], vrep.simx_opmode_oneshot) time.sleep(0.1) # wait for action to finish else: # in FK mode # do action self.FK['Joint1'] += action[0] self.FK['Joint2'] += action[1] self.FK['Joint3'] += action[2] self.FK['Joint4'] += action[3] self.FK['Joint5'] += action[4] self.FK['Joint6'] += action[5] # boundary self.FK['Joint1'] = np.clip(self.FK['Joint1'], *self.Joint_boundary[0]) self.FK['Joint2'] = np.clip(self.FK['Joint2'], *self.Joint_boundary[1]) self.FK['Joint3'] = np.clip(self.FK['Joint3'], *self.Joint_boundary[2]) self.FK['Joint4'] = np.clip(self.FK['Joint4'], *self.Joint_boundary[3]) self.FK['Joint5'] = np.clip(self.FK['Joint5'], *self.Joint_boundary[4]) self.FK['Joint6'] = np.clip(self.FK['Joint6'], *self.Joint_boundary[5]) # send signal vrep.simxSetFloatSignal( self.clientID, "Joint1", (self.FK['Joint1'] * np.pi / 180 - self.Joints[0][0]) / self.Joints[0][1] * 1000, vrep.simx_opmode_oneshot) vrep.simxSetFloatSignal( self.clientID, "Joint2", (self.FK['Joint2'] * np.pi / 180 - self.Joints[1][0]) / self.Joints[1][1] * 1000, vrep.simx_opmode_oneshot) vrep.simxSetFloatSignal( self.clientID, "Joint3", (self.FK['Joint3'] * np.pi / 180 - self.Joints[2][0]) / self.Joints[2][1] * 1000, vrep.simx_opmode_oneshot) vrep.simxSetFloatSignal( self.clientID, "Joint4", (self.FK['Joint4'] * np.pi / 180 - self.Joints[3][0]) / self.Joints[3][1] * 1000, vrep.simx_opmode_oneshot) vrep.simxSetFloatSignal( self.clientID, "Joint5", (self.FK['Joint5'] * np.pi / 180 - self.Joints[4][0]) / self.Joints[4][1] * 1000, vrep.simx_opmode_oneshot) vrep.simxSetFloatSignal( self.clientID, "Joint6", (self.FK['Joint6'] * np.pi / 180 - self.Joints[5][0]) / self.Joints[5][1] * 1000, vrep.simx_opmode_oneshot) # time.sleep(0.01) # wait for action to finish # print(self.position) # state self.state = np.concatenate( (self.position, self.forceVector, self.torqueVector)) # done and reward r, done = cal.reword(self.state) # safety check safe = cal.safetycheck(self.state) return self.code_state(self.state), self.state, r, done, safe