Example #1
0
    def __init__(self,
                 rs,
                 sizeOfTarget,
                 thetafile,
                 arm="Arm26",
                 estim="Inv",
                 actor=None,
                 logDir="Best",
                 saveDir=None):
        self.rs = rs
        self.posIni = np.loadtxt(pathDataFolder + rs.experimentFilePosIni)
        self.arm = ArmType[arm]()
        self.arm.setDT(rs.dt)
        if (not self.det):
            self.arm.setNoise(rs.noise)
        if (rs.costClass == None):
            self.trajCost = CostDDPG(rs)
        else:
            self.trajCost = rs.costClass(rs)
        if (len(self.posIni.shape) == 1):
            self.posIni = self.posIni.reshape((1, self.posIni.shape[0]))
        if estim == "Inv":
            self.stateEstimator = StateEstimator(rs.inputDim, rs.outputDim,
                                                 rs.delayUKF, self.arm)
        elif estim == "Reg":
            self.stateEstimator = StateEstimatorRegression(
                rs.inputDim, rs.outputDim, rs.delayUKF, self.arm)
        elif estim == "Hyb":
            self.stateEstimator = StateEstimatorHyb(rs.inputDim, rs.outputDim,
                                                    rs.delayUKF, self.arm)
        elif estim == "No":
            self.stateEstimator = StateEstimatorNoFeedBack(
                rs.inputDim, rs.outputDim, rs.delayUKF, self.arm)
        else:
            raise TypeError("This Estimator do not exist")

        if (saveDir == None):
            self.foldername = self.rs.OPTIpath + str(self.sizeOfTarget) + "/"
        else:
            self.foldername = saveDir

        self.costStore = []
        self.trajTimeStore = []
        self.lastCoord = []

        self.sizeOfTarget = sizeOfTarget
        self.thetafile = thetafile
        self.actor = actor
        self.saveName = self.foldername + logDir + "/"
        self.nbReset = 0
        self.cost = 0
        self.max = 0
        self.progressTab = [0.1, 0.25, 0.5, 0.75, 1.]
        self.progress = 1
        self.reset()
Example #2
0
 def __init__(self, rs, sizeOfTarget, thetafile, arm="Arm26", estim="Inv", actor=None, logDir="Best", saveDir=None):
     self.rs=rs
     self.posIni = np.loadtxt(pathDataFolder + rs.experimentFilePosIni)
     self.arm=ArmType[arm]()
     self.arm.setDT(rs.dt)
     if(not self.det):
         self.arm.setNoise(rs.noise)
     if(rs.costClass==None):
         self.trajCost=CostDDPG(rs)
     else:
         self.trajCost=rs.costClass(rs)
     if(len(self.posIni.shape)==1):
         self.posIni=self.posIni.reshape((1,self.posIni.shape[0]))
     if estim=="Inv" :
         self.stateEstimator = StateEstimator(rs.inputDim,rs.outputDim, rs.delayUKF, self.arm)
     elif estim=="Reg":
         self.stateEstimator = StateEstimatorRegression(rs.inputDim,rs.outputDim, rs.delayUKF, self.arm)
     elif estim == "Hyb" :
         self.stateEstimator = StateEstimatorHyb(rs.inputDim,rs.outputDim, rs.delayUKF, self.arm)
     elif estim =="No" :
         self.stateEstimator = StateEstimatorNoFeedBack(rs.inputDim,rs.outputDim, rs.delayUKF, self.arm)
     else :
         raise TypeError("This Estimator do not exist")
     
     if(saveDir==None):
         self.foldername=self.rs.OPTIpath + str(self.sizeOfTarget) + "/"
     else :
         self.foldername=saveDir
         
     self.costStore = []
     self.trajTimeStore = []
     self.lastCoord = []
     
     self.sizeOfTarget = sizeOfTarget
     self.thetafile = thetafile
     self.actor=actor
     self.saveName = self.foldername + logDir + "/"
     self.nbReset=0
     self.cost=0
     self.max=0
     self.progressTab=[0.1,0.25,0.5,0.75,1.]
     self.progress=1
     self.reset()
Example #3
0
class DDPGEnv(Env):

    print_interval = 20

    def __init__(self,
                 rs,
                 sizeOfTarget,
                 thetafile,
                 arm="Arm26",
                 estim="Inv",
                 actor=None,
                 logDir="Best",
                 saveDir=None):
        self.rs = rs
        self.posIni = np.loadtxt(pathDataFolder + rs.experimentFilePosIni)
        self.arm = ArmType[arm]()
        self.arm.setDT(rs.dt)
        if (not self.det):
            self.arm.setNoise(rs.noise)
        if (rs.costClass == None):
            self.trajCost = CostDDPG(rs)
        else:
            self.trajCost = rs.costClass(rs)
        if (len(self.posIni.shape) == 1):
            self.posIni = self.posIni.reshape((1, self.posIni.shape[0]))
        if estim == "Inv":
            self.stateEstimator = StateEstimator(rs.inputDim, rs.outputDim,
                                                 rs.delayUKF, self.arm)
        elif estim == "Reg":
            self.stateEstimator = StateEstimatorRegression(
                rs.inputDim, rs.outputDim, rs.delayUKF, self.arm)
        elif estim == "Hyb":
            self.stateEstimator = StateEstimatorHyb(rs.inputDim, rs.outputDim,
                                                    rs.delayUKF, self.arm)
        elif estim == "No":
            self.stateEstimator = StateEstimatorNoFeedBack(
                rs.inputDim, rs.outputDim, rs.delayUKF, self.arm)
        else:
            raise TypeError("This Estimator do not exist")

        if (saveDir == None):
            self.foldername = self.rs.OPTIpath + str(self.sizeOfTarget) + "/"
        else:
            self.foldername = saveDir

        self.costStore = []
        self.trajTimeStore = []
        self.lastCoord = []

        self.sizeOfTarget = sizeOfTarget
        self.thetafile = thetafile
        self.actor = actor
        self.saveName = self.foldername + logDir + "/"
        self.nbReset = 0
        self.cost = 0
        self.max = 0
        self.progressTab = [0.1, 0.25, 0.5, 0.75, 1.]
        self.progress = 1
        self.reset()

    def getActionSize(self):
        return self.rs.outputDim

    def getStateSize(self):
        return self.rs.inputDim

    def getActionBounds(self):
        return [[1] * self.rs.outputDim, [0] * self.rs.outputDim]

    def saveCost(self):
        writeArray(self.costStore, self.foldername + "CostCMAES/", "traj",
                   ".cost")
        writeArray(self.trajTimeStore, self.foldername + "TrajTime/", "traj",
                   ".time")
        writeArray(self.lastCoord, self.foldername + "finalX/", "x", ".last")

    def act(self, action):
        action = np.array(action[0])
        action = action + np.random.normal(0., 0.2, action.shape)
        return self.__act__(action)

    def __act__(self, action):
        if self.rs.det:
            realU = muscleFilter(action)
            #computation of the arm state
            realNextState = self.arm.computeNextState(realU,
                                                      self.arm.getState())

            #computation of the approximated state
            tmpState = self.arm.getState()

            estimNextState = realNextState
        else:
            #realU = getNoisyCommand(U,self.arm.getMusclesParameters().getKnoiseU())
            realU = getNoisyCommand(action, self.arm.musclesP.knoiseU)
            realU = muscleFilter(realU)

            #computation of the arm state
            realNextState = self.arm.computeNextState(realU,
                                                      self.arm.getState())

            #computation of the approximated state
            tmpState = self.arm.getState()

            action = muscleFilter(action)
            estimNextState = self.stateEstimator.getEstimState(
                tmpState, action)

        #print estimNextState

        self.arm.setState(realNextState)

        #computation of the cost

        #get dotq and q from the state vector
        _, q = self.arm.getDotQAndQFromStateVector(tmpState)
        self.coordHand = self.arm.mgdEndEffector(q)
        costAct = self.trajCost.computeStateTransitionCost(
            realU, self.coordHand)
        #print ("dotq :",dotq)
        #computation of the coordinates to check if the target is reach or not
        self.i += 1
        self.t += self.rs.dt
        self.estimState = estimNextState
        if (self.coordHand[1] >= self.rs.YTarget
                or self.i >= self.rs.maxSteps):
            costAct += self.trajCost.computeFinalReward(
                self.arm, self.t, self.coordHand, self.sizeOfTarget)
        self.cost += costAct
        return [realU], [costAct]

    def actAndStore(self, action):
        if self.rs.det:
            realU = muscleFilter(action)
            #computation of the arm state
            realNextState = self.arm.computeNextState(realU,
                                                      self.arm.getState())

            #computation of the approximated state
            tmpState = self.arm.getState()

            estimNextState = realNextState
        else:
            #realU = getNoisyCommand(U,self.arm.getMusclesParameters().getKnoiseU())
            realU = getNoisyCommand(action, self.arm.musclesP.knoiseU)
            realU = muscleFilter(realU)

            #computation of the arm state
            realNextState = self.arm.computeNextState(realU,
                                                      self.arm.getState())

            #computation of the approximated state
            tmpState = self.arm.getState()

            action = muscleFilter(action)
            estimNextState = self.stateEstimator.getEstimState(
                tmpState, action)

        #print estimNextState

        self.arm.setState(realNextState)

        #computation of the cost

        #get dotq and q from the state vector
        _, q = self.arm.getDotQAndQFromStateVector(tmpState)
        coordElbow, self.coordHand = self.arm.mgdFull(q)
        cost = self.trajCost.computeStateTransitionCost(realU, self.coordHand)

        stepStore = []
        stepStore.append(self.vectarget)
        stepStore.append(self.estimState)
        stepStore.append(tmpState)
        stepStore.append(realU)
        stepStore.append(action)
        stepStore.append(estimNextState)
        stepStore.append(realNextState)
        stepStore.append([coordElbow[0], coordElbow[1]])
        stepStore.append([self.coordHand[0], self.coordHand[1]])
        #print ("before",stepStore)
        tmpstore = np.array(stepStore).flatten()
        row = [item for sub in tmpstore for item in sub]
        #print ("store",row)
        self.dataStore.append(row)

        self.i += 1
        self.t += self.rs.dt
        self.estimState = estimNextState
        return [realU], [cost]

    def state(self):
        return [self.arm.getState()]

    def reset(self, noise=True):
        #print("Episode : "+str(self.nbReset)+ ", Progression :"+str(self.progress))
        #self.nbReset+=1
        #print(self.cost)
        #Discrete begining
        """
        i = rd.randint(0,len(self.posIni)-1)
        #i=0
        #computes the articular position q1, q2 from the initial coordinates (x, y)
        q1, q2 = self.arm.mgi(self.posIni[i][0], self.posIni[i][1])
        """
        #Progress
        """
        i = ((rd.random()*6*np.pi - 3*np.pi)*self.progressTab[self.progress]-6*np.pi)/12
        j= (rd.random()*0.3-0.15)*self.progressTab[self.progress]+0.25
        #i=0
        #computes the articular position q1, q2 from the initial coordinates (x, y)
        q1, q2 = self.arm.mgi(self.rs.XTarget+ j*np.cos(i), self.rs.YTarget+ j*np.sin(i))
        """
        """
        i =  - 6*np.pi/12
        j=rd.randint(1,4)/10.
        #j=0.1

        q1, q2 = self.arm.mgi(self.rs.XTarget+ j*np.cos(i), self.rs.YTarget+ j*np.sin(i))
        """

        x, y = self.begin()
        q1, q2 = self.arm.mgi(x, y)
        #creates the state vector [dotq1, dotq2, q1, q2]
        q = createVector(q1, q2)
        state = np.array([0., 0., q1, q2])
        #print("start state --------------: ",state)

        #computes the coordinates of the hand and the elbow from the position vector
        self.coordHand = self.arm.mgdEndEffector(q)
        #assert(coordHand[0]==x and coordHand[1]==y), "Erreur de MGD" does not work because of rounding effects

        #initializes parameters for the trajectory
        self.i, self.t, self.cost = 0, 0., 0.
        self.stateEstimator.initStore(state)
        self.arm.setState(state)
        self.estimState = state

    def begin(self):
        return self.beginDiscret()

    def beginOnePoint(self):
        return self.x, self.y

    def beginArcProgress(self):
        i = (rd.random() * 6 * np.pi - 9 * np.pi) / 12.
        j = rd.randint(1, self.progress) / 10.
        return self.rs.XTarget + j * np.cos(i), self.rs.YTarget + j * np.sin(i)

    def beginDiscret(self):
        i = rd.randint(0, len(self.posIni) - 1)
        return self.posIni[i][0], self.posIni[i][1]

    def isFinished(self):
        if (self.coordHand[1] >= self.rs.YTarget
                or self.i >= self.rs.maxSteps):
            return True
        return False

    def OneTraj(self, x, y):
        #computes the articular position q1, q2 from the initial coordinates (x, y)
        q1, q2 = self.arm.mgi(x, y)
        #creates the state vector [dotq1, dotq2, q1, q2]
        q = createVector(q1, q2)
        state = np.array([0., 0., q1, q2])
        #print("start state --------------: ",state)

        #computes the coordinates of the hand and the elbow from the position vector
        self.coordHand = self.arm.mgdEndEffector(q)
        #assert(coordHand[0]==x and coordHand[1]==y), "Erreur de MGD" does not work because of rounding effects

        #initializes parameters for the trajectory
        self.i, self.t, self.cost = 0, 0, 0
        self.stateEstimator.initStore(state)
        self.arm.setState(state)
        self.estimState = state

        self.vectarget = [0.0, 0.0, self.rs.XTarget, self.rs.YTarget]
        totalCost = 0
        while (not self.isFinished()):
            action = self.actor.action(self.estimState)
            _, cost = self.__act__(action)
            totalCost += cost[0]
        totalCost += self.trajCost.computeFinalReward(self.arm, self.t,
                                                      self.coordHand,
                                                      self.sizeOfTarget)
        return totalCost, self.t

    def saveOneTraj(self, x, y):
        #computes the articular position q1, q2 from the initial coordinates (x, y)
        q1, q2 = self.arm.mgi(x, y)
        #creates the state vector [dotq1, dotq2, q1, q2]
        q = createVector(q1, q2)
        state = np.array([0., 0., q1, q2])
        #print("start state --------------: ",state)

        #computes the coordinates of the hand and the elbow from the position vector
        self.coordHand = self.arm.mgdEndEffector(q)
        #assert(coordHand[0]==x and coordHand[1]==y), "Erreur de MGD" does not work because of rounding effects

        #initializes parameters for the trajectory
        self.i, self.t, self.cost = 0, 0, 0
        self.stateEstimator.initStore(state)
        self.arm.setState(state)
        self.estimState = state

        self.dataStore = []
        self.vectarget = [0.0, 0.0, self.rs.XTarget, self.rs.YTarget]
        totalCost = 0
        while (not self.isFinished()):
            action = self.actor.action(self.estimState)
            _, cost = self.actAndStore(action)
            totalCost += cost[0]
        totalCost += self.trajCost.computeFinalReward(self.arm, self.t,
                                                      self.coordHand,
                                                      self.sizeOfTarget)
        filename = findDataFilename(self.saveName + "Log/",
                                    "traj" + str(x) + "-" + str(y), ".log")
        np.savetxt(filename, self.dataStore)

        #used to ignore dispersion when the target line is not crossed
        if self.coordHand[1] >= self.rs.YTarget:
            self.lastCoord.append(self.coordHand[0])

        return totalCost, self.t

    def nTraj(self, (x, y), repeat):
        costAll, trajTimeAll = np.zeros(repeat), np.zeros(repeat)
        for i in range(repeat):
            costAll[i], trajTimeAll[i] = self.OneTraj(x, y)
        meanCost = np.mean(costAll)
        meanTrajTime = np.mean(trajTimeAll)
        return meanCost, meanTrajTime
Example #4
0
class DDPGEnv(Env):
    
    print_interval = 20
    
    def __init__(self, rs, sizeOfTarget, thetafile, arm="Arm26", estim="Inv", actor=None, logDir="Best", saveDir=None):
        self.rs=rs
        self.posIni = np.loadtxt(pathDataFolder + rs.experimentFilePosIni)
        self.arm=ArmType[arm]()
        self.arm.setDT(rs.dt)
        if(not self.det):
            self.arm.setNoise(rs.noise)
        if(rs.costClass==None):
            self.trajCost=CostDDPG(rs)
        else:
            self.trajCost=rs.costClass(rs)
        if(len(self.posIni.shape)==1):
            self.posIni=self.posIni.reshape((1,self.posIni.shape[0]))
        if estim=="Inv" :
            self.stateEstimator = StateEstimator(rs.inputDim,rs.outputDim, rs.delayUKF, self.arm)
        elif estim=="Reg":
            self.stateEstimator = StateEstimatorRegression(rs.inputDim,rs.outputDim, rs.delayUKF, self.arm)
        elif estim == "Hyb" :
            self.stateEstimator = StateEstimatorHyb(rs.inputDim,rs.outputDim, rs.delayUKF, self.arm)
        elif estim =="No" :
            self.stateEstimator = StateEstimatorNoFeedBack(rs.inputDim,rs.outputDim, rs.delayUKF, self.arm)
        else :
            raise TypeError("This Estimator do not exist")
        
        if(saveDir==None):
            self.foldername=self.rs.OPTIpath + str(self.sizeOfTarget) + "/"
        else :
            self.foldername=saveDir
            
        self.costStore = []
        self.trajTimeStore = []
        self.lastCoord = []
        
        self.sizeOfTarget = sizeOfTarget
        self.thetafile = thetafile
        self.actor=actor
        self.saveName = self.foldername + logDir + "/"
        self.nbReset=0
        self.cost=0
        self.max=0
        self.progressTab=[0.1,0.25,0.5,0.75,1.]
        self.progress=1
        self.reset()

        
    def getActionSize(self):
        return self.rs.outputDim
        
       
    def getStateSize(self):
        return self.rs.inputDim
        
        
    def getActionBounds(self):
        return [[1]*self.rs.outputDim, [0]*self.rs.outputDim]
    
    def saveCost(self):
        writeArray(self.costStore,self.foldername+"CostCMAES/","traj",".cost")
        writeArray(self.trajTimeStore, self.foldername+"TrajTime/","traj",".time")
        writeArray(self.lastCoord, self.foldername+"finalX/","x",".last")
        
    def act(self, action):
        action=np.array(action[0])
        action = action + np.random.normal(0.,0.2,action.shape)
        return self.__act__(action)
        
    
    def __act__(self, action):
        if self.rs.det:
            realU = muscleFilter(action)
            #computation of the arm state
            realNextState = self.arm.computeNextState(realU, self.arm.getState())
 
            #computation of the approximated state
            tmpState = self.arm.getState()
                
            estimNextState = realNextState
        else:
            #realU = getNoisyCommand(U,self.arm.getMusclesParameters().getKnoiseU())
            realU = getNoisyCommand(action,self.arm.musclesP.knoiseU)
            realU = muscleFilter(realU)


            #computation of the arm state
            realNextState = self.arm.computeNextState(realU, self.arm.getState())
     
            #computation of the approximated state
            tmpState = self.arm.getState()
                
            action = muscleFilter(action)
            estimNextState = self.stateEstimator.getEstimState(tmpState,action)


            
        #print estimNextState

        self.arm.setState(realNextState)

        #computation of the cost

        #get dotq and q from the state vector
        _, q = self.arm.getDotQAndQFromStateVector(tmpState)
        self.coordHand = self.arm.mgdEndEffector(q)
        costAct = self.trajCost.computeStateTransitionCost(realU, self.coordHand)
        #print ("dotq :",dotq)
        #computation of the coordinates to check if the target is reach or not
        self.i+=1
        self.t += self.rs.dt
        self.estimState = estimNextState
        if(self.coordHand[1] >= self.rs.YTarget or self.i >= self.rs.maxSteps):
            costAct += self.trajCost.computeFinalReward(self.arm,self.t, self.coordHand, self.sizeOfTarget)
        self.cost+=costAct
        return [realU], [costAct]
    
    
    
    def actAndStore(self, action):
        if self.rs.det:
            realU = muscleFilter(action)
            #computation of the arm state
            realNextState = self.arm.computeNextState(realU, self.arm.getState())
 
            #computation of the approximated state
            tmpState = self.arm.getState()
                
            estimNextState = realNextState
        else:
            #realU = getNoisyCommand(U,self.arm.getMusclesParameters().getKnoiseU())
            realU = getNoisyCommand(action,self.arm.musclesP.knoiseU)
            realU = muscleFilter(realU)


            #computation of the arm state
            realNextState = self.arm.computeNextState(realU, self.arm.getState())
     
            #computation of the approximated state
            tmpState = self.arm.getState()
                
            action = muscleFilter(action)
            estimNextState = self.stateEstimator.getEstimState(tmpState,action)


            
        #print estimNextState

        self.arm.setState(realNextState)

        #computation of the cost

        #get dotq and q from the state vector
        _, q = self.arm.getDotQAndQFromStateVector(tmpState)
        coordElbow, self.coordHand = self.arm.mgdFull(q)
        cost = self.trajCost.computeStateTransitionCost(realU, self.coordHand)
        
        stepStore=[]
        stepStore.append(self.vectarget)
        stepStore.append(self.estimState)
        stepStore.append(tmpState)
        stepStore.append(realU)
        stepStore.append(action)
        stepStore.append(estimNextState)
        stepStore.append(realNextState)
        stepStore.append([coordElbow[0], coordElbow[1]])
        stepStore.append([self.coordHand[0], self.coordHand[1]])
        #print ("before",stepStore)
        tmpstore = np.array(stepStore).flatten()
        row = [item for sub in tmpstore for item in sub]
        #print ("store",row)
        self.dataStore.append(row)
        
        self.i+=1
        self.t += self.rs.dt
        self.estimState = estimNextState
        return [realU], [cost]
    
    def state(self):
        return [self.arm.getState()]
    
    def reset(self, noise=True):
        #print("Episode : "+str(self.nbReset)+ ", Progression :"+str(self.progress))
        #self.nbReset+=1
        #print(self.cost)
        #Discrete begining
        """
        i = rd.randint(0,len(self.posIni)-1)
        #i=0
        #computes the articular position q1, q2 from the initial coordinates (x, y)
        q1, q2 = self.arm.mgi(self.posIni[i][0], self.posIni[i][1])
        """
        #Progress
        """
        i = ((rd.random()*6*np.pi - 3*np.pi)*self.progressTab[self.progress]-6*np.pi)/12
        j= (rd.random()*0.3-0.15)*self.progressTab[self.progress]+0.25
        #i=0
        #computes the articular position q1, q2 from the initial coordinates (x, y)
        q1, q2 = self.arm.mgi(self.rs.XTarget+ j*np.cos(i), self.rs.YTarget+ j*np.sin(i))
        """


        


        """
        i =  - 6*np.pi/12
        j=rd.randint(1,4)/10.
        #j=0.1

        q1, q2 = self.arm.mgi(self.rs.XTarget+ j*np.cos(i), self.rs.YTarget+ j*np.sin(i))
        """

        x, y = self.begin()
        q1, q2 = self.arm.mgi(x,y)
        #creates the state vector [dotq1, dotq2, q1, q2]
        q = createVector(q1,q2)
        state = np.array([0., 0., q1, q2])
        #print("start state --------------: ",state)

        #computes the coordinates of the hand and the elbow from the position vector
        self.coordHand = self.arm.mgdEndEffector(q)
        #assert(coordHand[0]==x and coordHand[1]==y), "Erreur de MGD" does not work because of rounding effects

        #initializes parameters for the trajectory
        self.i, self.t, self.cost = 0, 0., 0.
        self.stateEstimator.initStore(state)
        self.arm.setState(state)
        self.estimState = state

    def begin(self):
        return self.beginDiscret()

    def beginOnePoint(self):
        return self.x, self.y
        
    def beginArcProgress(self):
        i = (rd.random()*6*np.pi - 9*np.pi)/12.
        j=rd.randint(1,self.progress)/10.
        return self.rs.XTarget+ j*np.cos(i), self.rs.YTarget+ j*np.sin(i)
    
    def beginDiscret(self):
        i = rd.randint(0,len(self.posIni)-1)
        return self.posIni[i][0], self.posIni[i][1]

    def isFinished(self):
        if(self.coordHand[1] >= self.rs.YTarget or self.i >= self.rs.maxSteps):
            return True
        return False
    
    def OneTraj(self, x, y):
        #computes the articular position q1, q2 from the initial coordinates (x, y)
        q1, q2 = self.arm.mgi(x, y)
        #creates the state vector [dotq1, dotq2, q1, q2]
        q = createVector(q1,q2)
        state = np.array([0., 0., q1, q2])
        #print("start state --------------: ",state)

        #computes the coordinates of the hand and the elbow from the position vector
        self.coordHand = self.arm.mgdEndEffector(q)
        #assert(coordHand[0]==x and coordHand[1]==y), "Erreur de MGD" does not work because of rounding effects

        #initializes parameters for the trajectory
        self.i, self.t, self.cost = 0, 0, 0
        self.stateEstimator.initStore(state)
        self.arm.setState(state)
        self.estimState = state
        
        self.vectarget=[0.0, 0.0, self.rs.XTarget, self.rs.YTarget]
        totalCost=0
        while(not self.isFinished()):
            action=self.actor.action(self.estimState)
            _,cost = self.__act__(action)
            totalCost+= cost[0]
        totalCost += self.trajCost.computeFinalReward(self.arm,self.t,self.coordHand,self.sizeOfTarget)
        return totalCost, self.t
    
    def saveOneTraj(self, x, y):
        #computes the articular position q1, q2 from the initial coordinates (x, y)
        q1, q2 = self.arm.mgi(x, y)
        #creates the state vector [dotq1, dotq2, q1, q2]
        q = createVector(q1,q2)
        state = np.array([0., 0., q1, q2])
        #print("start state --------------: ",state)

        #computes the coordinates of the hand and the elbow from the position vector
        self.coordHand = self.arm.mgdEndEffector(q)
        #assert(coordHand[0]==x and coordHand[1]==y), "Erreur de MGD" does not work because of rounding effects

        #initializes parameters for the trajectory
        self.i, self.t, self.cost = 0, 0, 0
        self.stateEstimator.initStore(state)
        self.arm.setState(state)
        self.estimState = state
        
        self.dataStore=[]
        self.vectarget=[0.0, 0.0, self.rs.XTarget, self.rs.YTarget]
        totalCost=0
        while(not self.isFinished()):
            action=self.actor.action(self.estimState)
            _,cost = self.actAndStore(action)
            totalCost+= cost[0]
        totalCost += self.trajCost.computeFinalReward(self.arm,self.t,self.coordHand,self.sizeOfTarget)
        filename = findDataFilename(self.saveName+"Log/","traj"+str(x)+"-"+str(y),".log")
        np.savetxt(filename,self.dataStore)
        
        #used to ignore dispersion when the target line is not crossed
        if self.coordHand[1] >= self.rs.YTarget:
            self.lastCoord.append(self.coordHand[0])
            
        return totalCost, self.t
    
    def nTraj(self, (x, y), repeat):
        costAll, trajTimeAll = np.zeros(repeat), np.zeros(repeat)
        for i in range(repeat):
            costAll[i], trajTimeAll[i]  = self.OneTraj(x, y) 
        meanCost = np.mean(costAll)
        meanTrajTime = np.mean(trajTimeAll)
        return meanCost, meanTrajTime