def __init__(self, base_path): self.zeroSpeed = 0.3 self.basePath = base_path self.cacla_args = {'sigma':(0.01, 36.0), 'epsilon':(0.01, 1.0), 'alpha':(0.0001,0.0001), 'beta':(0.00005, 0.00005) \ ,'discount_factor':0.98, 'random_decay': 0.999, 'var_beta': 0.0, 'learning_decay':1.0 \ , 'explore_strategy':1} # Network not including input self.network = [500, 6] self.CACLA = cacla.Cacla(self.basePath, self.network, **self.cacla_args) # Rewards self.fixedPositive = 1 self.fixedNegative = -1 self.goalFound = 200 self.failed = -200 self.currentPosition1 = 0.0 self.currentPosition2 = 180.0 self.currentPosition3 = 180.0 self.currentPosition4 = 0.0 self.currentPosition5 = 0.0 self.currentPosition6 = 0.0 self.fk = FK()
def test_second_joint(self): # Joint variables theta = np.array(np.deg2rad([0, -90, 0, 0, 0, 0])) # calculate forward kinematics fk = FK(theta, self.d, self.a, self.alpha) # desired result result = np.array([[0, 0, 1, 0.62], [0, -1, 0, 0], [1, 0, 0, 0.995], [0, 0, 0, 1]]) # compare numpy arrays np.testing.assert_array_almost_equal(fk, result, self.decimal, 'Second joint test was failed!')
def test_zero_configuration(self): # Joint variables theta = np.array(np.deg2rad([0, 0, 0, 0, 0, 0])) # calculate forward kinematics fk = FK(theta, self.d, self.a, self.alpha) # desired result result = np.array([[1, 0, 0, 0.62], [0, -1, 0, 0], [0, 0, -1, -0.195], [0, 0, 0, 1]]) # compare numpy arrays np.testing.assert_array_almost_equal( fk, result, self.decimal, 'Zero configuration test was failed!')
def Viz(q): X= FK(q) #print(X) plot.plot(X[0],X[1],marker='o')
def Error(q, x_trg): X= FK(q) return (X[0,3]-x_trg[0,0])**2 + (X[1,3]-x_trg[1,0])**2
class CACLA_arm(object): def __init__(self, base_path): self.zeroSpeed = 0.3 self.basePath = base_path self.cacla_args = {'sigma':(0.01, 36.0), 'epsilon':(0.01, 1.0), 'alpha':(0.0001,0.0001), 'beta':(0.00005, 0.00005) \ ,'discount_factor':0.98, 'random_decay': 0.999, 'var_beta': 0.0, 'learning_decay':1.0 \ , 'explore_strategy':1} # Network not including input self.network = [500, 6] self.CACLA = cacla.Cacla(self.basePath, self.network, **self.cacla_args) # Rewards self.fixedPositive = 1 self.fixedNegative = -1 self.goalFound = 200 self.failed = -200 self.currentPosition1 = 0.0 self.currentPosition2 = 180.0 self.currentPosition3 = 180.0 self.currentPosition4 = 0.0 self.currentPosition5 = 0.0 self.currentPosition6 = 0.0 self.fk = FK() def writeConfig(self, goalPositions): f = open(os.path.join(self.basePath, "config"), 'w') f.write("Network: " + str(self.network) + '\n') f.write("Rewards:" + str(self.fixedPositive) + ", " + str(self.fixedNegative) + ", " + str(self.goalFound) + ", " + str(self.failed) + '\n') f.write("Goals: " + str(goalPositions)) f.close() def reset(self): self.prevDistance = -999 # just a init value which cannot be zero self.currentPosition1 = 0.0 self.currentPosition2 = 180.0 self.currentPosition3 = 180.0 self.currentPosition4 = 0.0 self.currentPosition5 = 0.0 self.currentPosition6 = 0.0 def wait(self, waitTime): beginTime = time.time() while True: # self.sim.checkPositions() curTime = time.time() if curTime - beginTime >= waitTime: break def getPosition(self): return self.currentPosition1, self.currentPosition2, self.currentPosition3, self.currentPosition4, self.currentPosition5, self.currentPosition6 def getEEF(self): return self.fk.getEEFPosition(joint1 = self.currentPosition1, joint2 = self.currentPosition2, joint3 = self.currentPosition3, \ joint4 = self.currentPosition4, joint5 = self.currentPosition5, joint6 = self.currentPosition6) def checkJointLimit(self, joint): joint = 360 + joint if joint < 0.0 else \ (joint - 360.0 if joint >= 360.0 else joint) return joint def performAction(self, action, dt): action1 = action[0] action2 = action[1] action3 = action[2] action4 = action[3] action5 = action[4] action6 = action[5] if action1 > -self.zeroSpeed and action1 < self.zeroSpeed: action1 = 0 # No speed when it's almost zero if action2 > -self.zeroSpeed and action2 < self.zeroSpeed: action2 = 0 # No speed when it's almost zero if action3 > -self.zeroSpeed and action3 < self.zeroSpeed: action3 = 0 # No speed when it's almost zero if action4 > -self.zeroSpeed and action4 < self.zeroSpeed: action4 = 0 # No speed when it's almost zero if action5 > -self.zeroSpeed and action5 < self.zeroSpeed: action5 = 0 # No speed when it's almost zero if action6 > -self.zeroSpeed and action6 < self.zeroSpeed: action6 = 0 # No speed when it's almost zero self.currentPosition1 = self.currentPosition1 + action1 * dt self.currentPosition2 = self.currentPosition2 + action2 * dt self.currentPosition3 = self.currentPosition3 + action3 * dt self.currentPosition4 = self.currentPosition4 + action4 * dt self.currentPosition5 = self.currentPosition5 + action5 * dt self.currentPosition6 = self.currentPosition6 + action6 * dt self.currentPosition1 = self.checkJointLimit(self.currentPosition1) if self.currentPosition2 < 50.0: self.currentPosition2 = 50.0 if self.currentPosition2 >= 310: self.currentPosition2 = 310 if self.currentPosition3 < 35.0: self.currentPosition3 = 35.0 if self.currentPosition3 >= 325.0: self.currentPosition3 = 325.0 self.currentPosition4 = self.checkJointLimit(self.currentPosition4) self.currentPosition5 = self.checkJointLimit(self.currentPosition5) self.currentPosition6 = self.checkJointLimit(self.currentPosition6) def get_reward(self, state, action): (x, y, z) = self.getEEF() distance = math.sqrt((state[0] - x)**2 + (state[1] - y)**2 + (state[2] - z)**2) if self.prevDistance == -999: self.prevDistance = distance return 0, False if distance < 0.02: # distance in m distance = 0 reward = 0 if distance < self.prevDistance: # getting closer to the goal reward = self.fixedPositive elif distance >= self.prevDistance: # moving away from the goal or not moving at all reward = self.fixedNegative #if distance < 0.2: # close to the object so give bigger rewards? # reward *= 10 if self.prevDistance == 0 and distance == 0 and (action[0] > -self.zeroSpeed and action[0] < self.zeroSpeed) and \ (action[1] > -self.zeroSpeed and action[1] < self.zeroSpeed) and \ (action[2] > -self.zeroSpeed and action[2] < self.zeroSpeed) and \ (action[3] > -self.zeroSpeed and action[3] < self.zeroSpeed) and \ (action[4] > -self.zeroSpeed and action[4] < self.zeroSpeed) and \ (action[5] > -self.zeroSpeed and action[5] < self.zeroSpeed): return self.goalFound, True self.prevDistance = distance return reward, False def createGoals(self): stepSize = 0.05 goalPositions = [] x = np.arange(-0.2, 0.2 + 0.01, stepSize) y = np.arange(-0.3, -0.2 + 0.01, stepSize) z = np.arange(0.2, 0.5 + 0.01, stepSize) for ix in x: for iy in y: for iz in z: goalPositions.append([ix, iy, iz]) print 'Nr of goal positions:', len(goalPositions) return goalPositions def run(self): nr_epochs = 50000 nr_iterations = 10000 goalPositions = [] successCount = 0 nrEpochsDone = 0 # joint1g = 90.0 # joint2g = 250.0 #joint3g = 250.0 #x,y,z = self.fk.getEEFPosition(joint1 = joint1g, joint2 = joint2g, joint3 = joint3g) #goalPositions = [[x, y, z]] #goalPositions = self.createGoals() goalPositions = [[0.0, -0.4, 0.3]] self.writeConfig(goalPositions) # write some stuff to the config file for epoch in range(0, nr_epochs): #np.random.shuffle(goalPositions) if epoch % 100 == 0: print epoch for g in goalPositions: nrEpochsDone += 1 action = [] terminal = False self.reset() # reset currentPosition to 180 for it in range(0, nr_iterations): state = [] for p in g: state.append(p) joint1, joint2, joint3, joint4, joint5, joint6 = self.getPosition( ) state.append(joint1 / 360.0) state.append(joint2 / 360.0) state.append(joint3 / 360.0) state.append(joint4 / 360.0) state.append(joint5 / 360.0) state.append(joint6 / 360.0) reward, terminal = self.get_reward(state, action) action = self.CACLA.run(state, reward, terminal) self.performAction(action, 0.02) if terminal: #print 'Total iterations:', it successCount += 1 break if nrEpochsDone % 100 == 0: print 'Nr. of successes:', successCount, "/", nrEpochsDone, ' = ', float( successCount) / nrEpochsDone if terminal == False: state = [] for p in g: state.append(p) joint1, joint2, joint3, joint4, joint5, joint6 = self.getPosition( ) state.append(joint1 / 360.0) state.append(joint2 / 360.0) state.append(joint3 / 360.0) state.append(joint4 / 360.0) state.append(joint5 / 360.0) state.append(joint6 / 360.0) reward = self.failed action = self.CACLA.run(state, reward, True) self.CACLA.save()