def __init__(self, rs, sizeOfTarget, thetafile, arm="Arm26", estim="Inv", actor=None, logDir="Best", saveDir=None): self.rs = rs self.posIni = np.loadtxt(pathDataFolder + rs.experimentFilePosIni) self.arm = ArmType[arm]() self.arm.setDT(rs.dt) if (not self.det): self.arm.setNoise(rs.noise) if (rs.costClass == None): self.trajCost = CostDDPG(rs) else: self.trajCost = rs.costClass(rs) if (len(self.posIni.shape) == 1): self.posIni = self.posIni.reshape((1, self.posIni.shape[0])) if estim == "Inv": self.stateEstimator = StateEstimator(rs.inputDim, rs.outputDim, rs.delayUKF, self.arm) elif estim == "Reg": self.stateEstimator = StateEstimatorRegression( rs.inputDim, rs.outputDim, rs.delayUKF, self.arm) elif estim == "Hyb": self.stateEstimator = StateEstimatorHyb(rs.inputDim, rs.outputDim, rs.delayUKF, self.arm) elif estim == "No": self.stateEstimator = StateEstimatorNoFeedBack( rs.inputDim, rs.outputDim, rs.delayUKF, self.arm) else: raise TypeError("This Estimator do not exist") if (saveDir == None): self.foldername = self.rs.OPTIpath + str(self.sizeOfTarget) + "/" else: self.foldername = saveDir self.costStore = [] self.trajTimeStore = [] self.lastCoord = [] self.sizeOfTarget = sizeOfTarget self.thetafile = thetafile self.actor = actor self.saveName = self.foldername + logDir + "/" self.nbReset = 0 self.cost = 0 self.max = 0 self.progressTab = [0.1, 0.25, 0.5, 0.75, 1.] self.progress = 1 self.reset()
def __init__(self, rs, sizeOfTarget, thetafile, arm="Arm26", estim="Inv", actor=None, logDir="Best", saveDir=None): self.rs=rs self.posIni = np.loadtxt(pathDataFolder + rs.experimentFilePosIni) self.arm=ArmType[arm]() self.arm.setDT(rs.dt) if(not self.det): self.arm.setNoise(rs.noise) if(rs.costClass==None): self.trajCost=CostDDPG(rs) else: self.trajCost=rs.costClass(rs) if(len(self.posIni.shape)==1): self.posIni=self.posIni.reshape((1,self.posIni.shape[0])) if estim=="Inv" : self.stateEstimator = StateEstimator(rs.inputDim,rs.outputDim, rs.delayUKF, self.arm) elif estim=="Reg": self.stateEstimator = StateEstimatorRegression(rs.inputDim,rs.outputDim, rs.delayUKF, self.arm) elif estim == "Hyb" : self.stateEstimator = StateEstimatorHyb(rs.inputDim,rs.outputDim, rs.delayUKF, self.arm) elif estim =="No" : self.stateEstimator = StateEstimatorNoFeedBack(rs.inputDim,rs.outputDim, rs.delayUKF, self.arm) else : raise TypeError("This Estimator do not exist") if(saveDir==None): self.foldername=self.rs.OPTIpath + str(self.sizeOfTarget) + "/" else : self.foldername=saveDir self.costStore = [] self.trajTimeStore = [] self.lastCoord = [] self.sizeOfTarget = sizeOfTarget self.thetafile = thetafile self.actor=actor self.saveName = self.foldername + logDir + "/" self.nbReset=0 self.cost=0 self.max=0 self.progressTab=[0.1,0.25,0.5,0.75,1.] self.progress=1 self.reset()
class DDPGEnv(Env): print_interval = 20 def __init__(self, rs, sizeOfTarget, thetafile, arm="Arm26", estim="Inv", actor=None, logDir="Best", saveDir=None): self.rs = rs self.posIni = np.loadtxt(pathDataFolder + rs.experimentFilePosIni) self.arm = ArmType[arm]() self.arm.setDT(rs.dt) if (not self.det): self.arm.setNoise(rs.noise) if (rs.costClass == None): self.trajCost = CostDDPG(rs) else: self.trajCost = rs.costClass(rs) if (len(self.posIni.shape) == 1): self.posIni = self.posIni.reshape((1, self.posIni.shape[0])) if estim == "Inv": self.stateEstimator = StateEstimator(rs.inputDim, rs.outputDim, rs.delayUKF, self.arm) elif estim == "Reg": self.stateEstimator = StateEstimatorRegression( rs.inputDim, rs.outputDim, rs.delayUKF, self.arm) elif estim == "Hyb": self.stateEstimator = StateEstimatorHyb(rs.inputDim, rs.outputDim, rs.delayUKF, self.arm) elif estim == "No": self.stateEstimator = StateEstimatorNoFeedBack( rs.inputDim, rs.outputDim, rs.delayUKF, self.arm) else: raise TypeError("This Estimator do not exist") if (saveDir == None): self.foldername = self.rs.OPTIpath + str(self.sizeOfTarget) + "/" else: self.foldername = saveDir self.costStore = [] self.trajTimeStore = [] self.lastCoord = [] self.sizeOfTarget = sizeOfTarget self.thetafile = thetafile self.actor = actor self.saveName = self.foldername + logDir + "/" self.nbReset = 0 self.cost = 0 self.max = 0 self.progressTab = [0.1, 0.25, 0.5, 0.75, 1.] self.progress = 1 self.reset() def getActionSize(self): return self.rs.outputDim def getStateSize(self): return self.rs.inputDim def getActionBounds(self): return [[1] * self.rs.outputDim, [0] * self.rs.outputDim] def saveCost(self): writeArray(self.costStore, self.foldername + "CostCMAES/", "traj", ".cost") writeArray(self.trajTimeStore, self.foldername + "TrajTime/", "traj", ".time") writeArray(self.lastCoord, self.foldername + "finalX/", "x", ".last") def act(self, action): action = np.array(action[0]) action = action + np.random.normal(0., 0.2, action.shape) return self.__act__(action) def __act__(self, action): if self.rs.det: realU = muscleFilter(action) #computation of the arm state realNextState = self.arm.computeNextState(realU, self.arm.getState()) #computation of the approximated state tmpState = self.arm.getState() estimNextState = realNextState else: #realU = getNoisyCommand(U,self.arm.getMusclesParameters().getKnoiseU()) realU = getNoisyCommand(action, self.arm.musclesP.knoiseU) realU = muscleFilter(realU) #computation of the arm state realNextState = self.arm.computeNextState(realU, self.arm.getState()) #computation of the approximated state tmpState = self.arm.getState() action = muscleFilter(action) estimNextState = self.stateEstimator.getEstimState( tmpState, action) #print estimNextState self.arm.setState(realNextState) #computation of the cost #get dotq and q from the state vector _, q = self.arm.getDotQAndQFromStateVector(tmpState) self.coordHand = self.arm.mgdEndEffector(q) costAct = self.trajCost.computeStateTransitionCost( realU, self.coordHand) #print ("dotq :",dotq) #computation of the coordinates to check if the target is reach or not self.i += 1 self.t += self.rs.dt self.estimState = estimNextState if (self.coordHand[1] >= self.rs.YTarget or self.i >= self.rs.maxSteps): costAct += self.trajCost.computeFinalReward( self.arm, self.t, self.coordHand, self.sizeOfTarget) self.cost += costAct return [realU], [costAct] def actAndStore(self, action): if self.rs.det: realU = muscleFilter(action) #computation of the arm state realNextState = self.arm.computeNextState(realU, self.arm.getState()) #computation of the approximated state tmpState = self.arm.getState() estimNextState = realNextState else: #realU = getNoisyCommand(U,self.arm.getMusclesParameters().getKnoiseU()) realU = getNoisyCommand(action, self.arm.musclesP.knoiseU) realU = muscleFilter(realU) #computation of the arm state realNextState = self.arm.computeNextState(realU, self.arm.getState()) #computation of the approximated state tmpState = self.arm.getState() action = muscleFilter(action) estimNextState = self.stateEstimator.getEstimState( tmpState, action) #print estimNextState self.arm.setState(realNextState) #computation of the cost #get dotq and q from the state vector _, q = self.arm.getDotQAndQFromStateVector(tmpState) coordElbow, self.coordHand = self.arm.mgdFull(q) cost = self.trajCost.computeStateTransitionCost(realU, self.coordHand) stepStore = [] stepStore.append(self.vectarget) stepStore.append(self.estimState) stepStore.append(tmpState) stepStore.append(realU) stepStore.append(action) stepStore.append(estimNextState) stepStore.append(realNextState) stepStore.append([coordElbow[0], coordElbow[1]]) stepStore.append([self.coordHand[0], self.coordHand[1]]) #print ("before",stepStore) tmpstore = np.array(stepStore).flatten() row = [item for sub in tmpstore for item in sub] #print ("store",row) self.dataStore.append(row) self.i += 1 self.t += self.rs.dt self.estimState = estimNextState return [realU], [cost] def state(self): return [self.arm.getState()] def reset(self, noise=True): #print("Episode : "+str(self.nbReset)+ ", Progression :"+str(self.progress)) #self.nbReset+=1 #print(self.cost) #Discrete begining """ i = rd.randint(0,len(self.posIni)-1) #i=0 #computes the articular position q1, q2 from the initial coordinates (x, y) q1, q2 = self.arm.mgi(self.posIni[i][0], self.posIni[i][1]) """ #Progress """ i = ((rd.random()*6*np.pi - 3*np.pi)*self.progressTab[self.progress]-6*np.pi)/12 j= (rd.random()*0.3-0.15)*self.progressTab[self.progress]+0.25 #i=0 #computes the articular position q1, q2 from the initial coordinates (x, y) q1, q2 = self.arm.mgi(self.rs.XTarget+ j*np.cos(i), self.rs.YTarget+ j*np.sin(i)) """ """ i = - 6*np.pi/12 j=rd.randint(1,4)/10. #j=0.1 q1, q2 = self.arm.mgi(self.rs.XTarget+ j*np.cos(i), self.rs.YTarget+ j*np.sin(i)) """ x, y = self.begin() q1, q2 = self.arm.mgi(x, y) #creates the state vector [dotq1, dotq2, q1, q2] q = createVector(q1, q2) state = np.array([0., 0., q1, q2]) #print("start state --------------: ",state) #computes the coordinates of the hand and the elbow from the position vector self.coordHand = self.arm.mgdEndEffector(q) #assert(coordHand[0]==x and coordHand[1]==y), "Erreur de MGD" does not work because of rounding effects #initializes parameters for the trajectory self.i, self.t, self.cost = 0, 0., 0. self.stateEstimator.initStore(state) self.arm.setState(state) self.estimState = state def begin(self): return self.beginDiscret() def beginOnePoint(self): return self.x, self.y def beginArcProgress(self): i = (rd.random() * 6 * np.pi - 9 * np.pi) / 12. j = rd.randint(1, self.progress) / 10. return self.rs.XTarget + j * np.cos(i), self.rs.YTarget + j * np.sin(i) def beginDiscret(self): i = rd.randint(0, len(self.posIni) - 1) return self.posIni[i][0], self.posIni[i][1] def isFinished(self): if (self.coordHand[1] >= self.rs.YTarget or self.i >= self.rs.maxSteps): return True return False def OneTraj(self, x, y): #computes the articular position q1, q2 from the initial coordinates (x, y) q1, q2 = self.arm.mgi(x, y) #creates the state vector [dotq1, dotq2, q1, q2] q = createVector(q1, q2) state = np.array([0., 0., q1, q2]) #print("start state --------------: ",state) #computes the coordinates of the hand and the elbow from the position vector self.coordHand = self.arm.mgdEndEffector(q) #assert(coordHand[0]==x and coordHand[1]==y), "Erreur de MGD" does not work because of rounding effects #initializes parameters for the trajectory self.i, self.t, self.cost = 0, 0, 0 self.stateEstimator.initStore(state) self.arm.setState(state) self.estimState = state self.vectarget = [0.0, 0.0, self.rs.XTarget, self.rs.YTarget] totalCost = 0 while (not self.isFinished()): action = self.actor.action(self.estimState) _, cost = self.__act__(action) totalCost += cost[0] totalCost += self.trajCost.computeFinalReward(self.arm, self.t, self.coordHand, self.sizeOfTarget) return totalCost, self.t def saveOneTraj(self, x, y): #computes the articular position q1, q2 from the initial coordinates (x, y) q1, q2 = self.arm.mgi(x, y) #creates the state vector [dotq1, dotq2, q1, q2] q = createVector(q1, q2) state = np.array([0., 0., q1, q2]) #print("start state --------------: ",state) #computes the coordinates of the hand and the elbow from the position vector self.coordHand = self.arm.mgdEndEffector(q) #assert(coordHand[0]==x and coordHand[1]==y), "Erreur de MGD" does not work because of rounding effects #initializes parameters for the trajectory self.i, self.t, self.cost = 0, 0, 0 self.stateEstimator.initStore(state) self.arm.setState(state) self.estimState = state self.dataStore = [] self.vectarget = [0.0, 0.0, self.rs.XTarget, self.rs.YTarget] totalCost = 0 while (not self.isFinished()): action = self.actor.action(self.estimState) _, cost = self.actAndStore(action) totalCost += cost[0] totalCost += self.trajCost.computeFinalReward(self.arm, self.t, self.coordHand, self.sizeOfTarget) filename = findDataFilename(self.saveName + "Log/", "traj" + str(x) + "-" + str(y), ".log") np.savetxt(filename, self.dataStore) #used to ignore dispersion when the target line is not crossed if self.coordHand[1] >= self.rs.YTarget: self.lastCoord.append(self.coordHand[0]) return totalCost, self.t def nTraj(self, (x, y), repeat): costAll, trajTimeAll = np.zeros(repeat), np.zeros(repeat) for i in range(repeat): costAll[i], trajTimeAll[i] = self.OneTraj(x, y) meanCost = np.mean(costAll) meanTrajTime = np.mean(trajTimeAll) return meanCost, meanTrajTime
class DDPGEnv(Env): print_interval = 20 def __init__(self, rs, sizeOfTarget, thetafile, arm="Arm26", estim="Inv", actor=None, logDir="Best", saveDir=None): self.rs=rs self.posIni = np.loadtxt(pathDataFolder + rs.experimentFilePosIni) self.arm=ArmType[arm]() self.arm.setDT(rs.dt) if(not self.det): self.arm.setNoise(rs.noise) if(rs.costClass==None): self.trajCost=CostDDPG(rs) else: self.trajCost=rs.costClass(rs) if(len(self.posIni.shape)==1): self.posIni=self.posIni.reshape((1,self.posIni.shape[0])) if estim=="Inv" : self.stateEstimator = StateEstimator(rs.inputDim,rs.outputDim, rs.delayUKF, self.arm) elif estim=="Reg": self.stateEstimator = StateEstimatorRegression(rs.inputDim,rs.outputDim, rs.delayUKF, self.arm) elif estim == "Hyb" : self.stateEstimator = StateEstimatorHyb(rs.inputDim,rs.outputDim, rs.delayUKF, self.arm) elif estim =="No" : self.stateEstimator = StateEstimatorNoFeedBack(rs.inputDim,rs.outputDim, rs.delayUKF, self.arm) else : raise TypeError("This Estimator do not exist") if(saveDir==None): self.foldername=self.rs.OPTIpath + str(self.sizeOfTarget) + "/" else : self.foldername=saveDir self.costStore = [] self.trajTimeStore = [] self.lastCoord = [] self.sizeOfTarget = sizeOfTarget self.thetafile = thetafile self.actor=actor self.saveName = self.foldername + logDir + "/" self.nbReset=0 self.cost=0 self.max=0 self.progressTab=[0.1,0.25,0.5,0.75,1.] self.progress=1 self.reset() def getActionSize(self): return self.rs.outputDim def getStateSize(self): return self.rs.inputDim def getActionBounds(self): return [[1]*self.rs.outputDim, [0]*self.rs.outputDim] def saveCost(self): writeArray(self.costStore,self.foldername+"CostCMAES/","traj",".cost") writeArray(self.trajTimeStore, self.foldername+"TrajTime/","traj",".time") writeArray(self.lastCoord, self.foldername+"finalX/","x",".last") def act(self, action): action=np.array(action[0]) action = action + np.random.normal(0.,0.2,action.shape) return self.__act__(action) def __act__(self, action): if self.rs.det: realU = muscleFilter(action) #computation of the arm state realNextState = self.arm.computeNextState(realU, self.arm.getState()) #computation of the approximated state tmpState = self.arm.getState() estimNextState = realNextState else: #realU = getNoisyCommand(U,self.arm.getMusclesParameters().getKnoiseU()) realU = getNoisyCommand(action,self.arm.musclesP.knoiseU) realU = muscleFilter(realU) #computation of the arm state realNextState = self.arm.computeNextState(realU, self.arm.getState()) #computation of the approximated state tmpState = self.arm.getState() action = muscleFilter(action) estimNextState = self.stateEstimator.getEstimState(tmpState,action) #print estimNextState self.arm.setState(realNextState) #computation of the cost #get dotq and q from the state vector _, q = self.arm.getDotQAndQFromStateVector(tmpState) self.coordHand = self.arm.mgdEndEffector(q) costAct = self.trajCost.computeStateTransitionCost(realU, self.coordHand) #print ("dotq :",dotq) #computation of the coordinates to check if the target is reach or not self.i+=1 self.t += self.rs.dt self.estimState = estimNextState if(self.coordHand[1] >= self.rs.YTarget or self.i >= self.rs.maxSteps): costAct += self.trajCost.computeFinalReward(self.arm,self.t, self.coordHand, self.sizeOfTarget) self.cost+=costAct return [realU], [costAct] def actAndStore(self, action): if self.rs.det: realU = muscleFilter(action) #computation of the arm state realNextState = self.arm.computeNextState(realU, self.arm.getState()) #computation of the approximated state tmpState = self.arm.getState() estimNextState = realNextState else: #realU = getNoisyCommand(U,self.arm.getMusclesParameters().getKnoiseU()) realU = getNoisyCommand(action,self.arm.musclesP.knoiseU) realU = muscleFilter(realU) #computation of the arm state realNextState = self.arm.computeNextState(realU, self.arm.getState()) #computation of the approximated state tmpState = self.arm.getState() action = muscleFilter(action) estimNextState = self.stateEstimator.getEstimState(tmpState,action) #print estimNextState self.arm.setState(realNextState) #computation of the cost #get dotq and q from the state vector _, q = self.arm.getDotQAndQFromStateVector(tmpState) coordElbow, self.coordHand = self.arm.mgdFull(q) cost = self.trajCost.computeStateTransitionCost(realU, self.coordHand) stepStore=[] stepStore.append(self.vectarget) stepStore.append(self.estimState) stepStore.append(tmpState) stepStore.append(realU) stepStore.append(action) stepStore.append(estimNextState) stepStore.append(realNextState) stepStore.append([coordElbow[0], coordElbow[1]]) stepStore.append([self.coordHand[0], self.coordHand[1]]) #print ("before",stepStore) tmpstore = np.array(stepStore).flatten() row = [item for sub in tmpstore for item in sub] #print ("store",row) self.dataStore.append(row) self.i+=1 self.t += self.rs.dt self.estimState = estimNextState return [realU], [cost] def state(self): return [self.arm.getState()] def reset(self, noise=True): #print("Episode : "+str(self.nbReset)+ ", Progression :"+str(self.progress)) #self.nbReset+=1 #print(self.cost) #Discrete begining """ i = rd.randint(0,len(self.posIni)-1) #i=0 #computes the articular position q1, q2 from the initial coordinates (x, y) q1, q2 = self.arm.mgi(self.posIni[i][0], self.posIni[i][1]) """ #Progress """ i = ((rd.random()*6*np.pi - 3*np.pi)*self.progressTab[self.progress]-6*np.pi)/12 j= (rd.random()*0.3-0.15)*self.progressTab[self.progress]+0.25 #i=0 #computes the articular position q1, q2 from the initial coordinates (x, y) q1, q2 = self.arm.mgi(self.rs.XTarget+ j*np.cos(i), self.rs.YTarget+ j*np.sin(i)) """ """ i = - 6*np.pi/12 j=rd.randint(1,4)/10. #j=0.1 q1, q2 = self.arm.mgi(self.rs.XTarget+ j*np.cos(i), self.rs.YTarget+ j*np.sin(i)) """ x, y = self.begin() q1, q2 = self.arm.mgi(x,y) #creates the state vector [dotq1, dotq2, q1, q2] q = createVector(q1,q2) state = np.array([0., 0., q1, q2]) #print("start state --------------: ",state) #computes the coordinates of the hand and the elbow from the position vector self.coordHand = self.arm.mgdEndEffector(q) #assert(coordHand[0]==x and coordHand[1]==y), "Erreur de MGD" does not work because of rounding effects #initializes parameters for the trajectory self.i, self.t, self.cost = 0, 0., 0. self.stateEstimator.initStore(state) self.arm.setState(state) self.estimState = state def begin(self): return self.beginDiscret() def beginOnePoint(self): return self.x, self.y def beginArcProgress(self): i = (rd.random()*6*np.pi - 9*np.pi)/12. j=rd.randint(1,self.progress)/10. return self.rs.XTarget+ j*np.cos(i), self.rs.YTarget+ j*np.sin(i) def beginDiscret(self): i = rd.randint(0,len(self.posIni)-1) return self.posIni[i][0], self.posIni[i][1] def isFinished(self): if(self.coordHand[1] >= self.rs.YTarget or self.i >= self.rs.maxSteps): return True return False def OneTraj(self, x, y): #computes the articular position q1, q2 from the initial coordinates (x, y) q1, q2 = self.arm.mgi(x, y) #creates the state vector [dotq1, dotq2, q1, q2] q = createVector(q1,q2) state = np.array([0., 0., q1, q2]) #print("start state --------------: ",state) #computes the coordinates of the hand and the elbow from the position vector self.coordHand = self.arm.mgdEndEffector(q) #assert(coordHand[0]==x and coordHand[1]==y), "Erreur de MGD" does not work because of rounding effects #initializes parameters for the trajectory self.i, self.t, self.cost = 0, 0, 0 self.stateEstimator.initStore(state) self.arm.setState(state) self.estimState = state self.vectarget=[0.0, 0.0, self.rs.XTarget, self.rs.YTarget] totalCost=0 while(not self.isFinished()): action=self.actor.action(self.estimState) _,cost = self.__act__(action) totalCost+= cost[0] totalCost += self.trajCost.computeFinalReward(self.arm,self.t,self.coordHand,self.sizeOfTarget) return totalCost, self.t def saveOneTraj(self, x, y): #computes the articular position q1, q2 from the initial coordinates (x, y) q1, q2 = self.arm.mgi(x, y) #creates the state vector [dotq1, dotq2, q1, q2] q = createVector(q1,q2) state = np.array([0., 0., q1, q2]) #print("start state --------------: ",state) #computes the coordinates of the hand and the elbow from the position vector self.coordHand = self.arm.mgdEndEffector(q) #assert(coordHand[0]==x and coordHand[1]==y), "Erreur de MGD" does not work because of rounding effects #initializes parameters for the trajectory self.i, self.t, self.cost = 0, 0, 0 self.stateEstimator.initStore(state) self.arm.setState(state) self.estimState = state self.dataStore=[] self.vectarget=[0.0, 0.0, self.rs.XTarget, self.rs.YTarget] totalCost=0 while(not self.isFinished()): action=self.actor.action(self.estimState) _,cost = self.actAndStore(action) totalCost+= cost[0] totalCost += self.trajCost.computeFinalReward(self.arm,self.t,self.coordHand,self.sizeOfTarget) filename = findDataFilename(self.saveName+"Log/","traj"+str(x)+"-"+str(y),".log") np.savetxt(filename,self.dataStore) #used to ignore dispersion when the target line is not crossed if self.coordHand[1] >= self.rs.YTarget: self.lastCoord.append(self.coordHand[0]) return totalCost, self.t def nTraj(self, (x, y), repeat): costAll, trajTimeAll = np.zeros(repeat), np.zeros(repeat) for i in range(repeat): costAll[i], trajTimeAll[i] = self.OneTraj(x, y) meanCost = np.mean(costAll) meanTrajTime = np.mean(trajTimeAll) return meanCost, meanTrajTime