def makeState(self):
     #Version 1
     if self.stateVersion == 1:
         bwUsed = Util.bandwithUsed(self.allocateur.listSlicesCurrentlyAllocated, self.allocateur.currentAlloc)
         return [self.nbSlicesAddSinceLastReconf, self.nbSlicesRejectSinceLastReconf, self.nbSlicesRemoveSinceLastReconf, self.nbMinutesSinceLastReconf, float(bwUsed)/self.topology.linksCapacity, float(self.timeStep)/len(self.scenario.listOfArrival)]
     
     #Version 2
     elif self.stateVersion == 2:
         print("Version make state NOOOO")
         exit()
         bwUsed, nbVnfUsed = Util.bandwithAndVnfUsed(self.allocateur.listSlicesCurrentlyAllocated, self.allocateur.currentAlloc)
         maxBwServed = self.topology.numberLinksToBaseStations * self.topology.capacityEdgeLinks
         bwServed = 0
         for s in self.allocateur.listSlicesCurrentlyAllocated:
             bwServed += s.bd
         return [ float(bwServed)/maxBwServed, float(nbVnfUsed)/self.nbVnf, float(bwUsed)/self.topology.linksCapacity, float(self.timeStep)/len(self.scenario.listOfArrival)]
     
     #Version 3
     elif self.stateVersion == 3:
         return [float(self.nbMinutesSinceLastReconf)/len(self.scenario.listOfArrival), float(self.timeStep)/len(self.scenario.listOfArrival)]
     
     #Version 4
     elif self.stateVersion == 4:
         return [float(self.nbSlicesAddSinceLastReconf)/len(self.scenario.listOfArrival), float(self.nbSlicesRemoveSinceLastReconf)/len(self.scenario.listOfArrival), float(self.nbMinutesSinceLastReconf)/len(self.scenario.listOfArrival), float(self.timeStep)/len(self.scenario.listOfArrival)]
     #Version 5
     elif self.stateVersion == 5:
         linksUsage, nodesUsage, nodeFunction =  Util.utilisationAndVnfUsed(self.functions, self.allocateur.listSlicesCurrentlyAllocated, self.allocateur.currentAlloc, roundNumber = 8)
         newCostVnfsUsed = 0
         for u in nodesUsage:
             for f in nodeFunction[u]:
                 newCostVnfsUsed += self.functions[f][1]
         return [float(self.nbSlicesAddSinceLastReconf)/len(self.scenario.listOfArrival), float(self.nbSlicesRemoveSinceLastReconf)/len(self.scenario.listOfArrival), float(self.nbMinutesSinceLastReconf)/len(self.scenario.listOfArrival), float(self.timeStep)/len(self.scenario.listOfArrival), newCostVnfsUsed]
     #Version 6
     elif self.stateVersion == 6:
         return [float(self.nbSlicesAddSinceLastReconf)/len(self.scenario.listOfArrival), float(self.nbSlicesRemoveSinceLastReconf)/len(self.scenario.listOfArrival), float(self.nbMinutesSinceLastReconf)/len(self.scenario.listOfArrival)]
    def _step(self, action):        
        if self._episode_ended:
            return self._reset()
        self.reconfsDone.append(action.item(0))


        """    #################################################################################################################    """
        """                               We first reconfigure to see if we have an improvement                                     """
        """    #################################################################################################################    """     
        
        #print("Debug Env : Step {}".format(self.timeStep))
        
        listListSlicesState = []
        listListSlicesFakeAlloc = []
        listListSlicesCost = []
        for _ in range(self.numberOfStepsByState):
    
            listSlices = self.scenario.getNewSlices()
            if listSlices == None:
                listListSlicesState.append(None)
                listListSlicesFakeAlloc.append(None)
                break
            else:
                listListSlicesState.append(listSlices)
                listListSlicesFakeAlloc.append(listSlices)
                
        iteratorTmp = self.scenario.iteratorArrival
        for _ in range(self.numberOfStepsByState, self.numberOfStepsForCost):
            
            if iteratorTmp == self.scenario.nbTimeStep:
                listListSlicesCost.append(None)
                listListSlicesFakeAlloc.append(None)
                break
            else:
                listListSlicesCost.append(self.scenario.listOfArrival[iteratorTmp])
                listListSlicesFakeAlloc.append(self.scenario.listOfArrival[iteratorTmp])
                iteratorTmp+=1
        
        """
        if self.timeStep % 150 == 0:
            print("EnvironementCost %")
        #if self.timeStep == param.startDynamic:
            action = 1
        else:
            action = 0"""
        
        """if self.timeStep == 205:
            exit()"""
        

        listProfit = []
        listCostVnfUsed = []
        listProfitNoReconf = []
        listCostVnfNoReconf = []
        
        if self.rewardVersion == 4:
            listCostVnfNoReconf, listBwAllocatedNoReconf, listProfitNoReconf = self.realAlloc(self.allocateurNoReconf, listListSlicesState) 

        #I the action is to reconfigure
        if action == 1:
            
            if self.rewardVersion == 4:
                listCostVnfNoReconfTmp, listBwAllocatedNoReconfTmp, listProfitNoReconfTmp = self.fakeAlloc(self.allocateurNoReconf, listListSlicesCost) 
                for i in range(len(listProfitNoReconfTmp)):
                    listProfitNoReconf.append(listProfitNoReconfTmp[i])
                    #listBwAllocatedNoReconf.append(listBwAllocatedNoReconfTmp[i])
                    listCostVnfNoReconf.append(listCostVnfNoReconfTmp[i])
                
            else:
                listCostVnfNoReconf, listBwAllocatedNoReconf, listProfitNoReconf = self.fakeAlloc(self.allocateur, listListSlicesFakeAlloc)
            
            if self.evaluation:
                self.listTimeStepReconf.append(self.timeStep)
                periodeList = param.timePeriodeDynamic
                periode = "D3"
                for i in periodeList:
                    if self.timeStep >= i:
                        periode = periodeList[i]
                self.listPeriodeReconf.append(periode)
                self.listNbSlicesAddSinceLastReconf.append(self.nbSlicesAddSinceLastReconf)
                self.listNbSlicesRemoveSinceLastReconf.append(self.nbSlicesRemoveSinceLastReconf)
                self.listNbMinutesSinceLastReconf.append(self.nbMinutesSinceLastReconf)
                bwUsed, nbVnfUsed = Util.bandwithAndVnfUsed(self.allocateur.listSlicesCurrentlyAllocated, self.allocateur.currentAlloc)
                self.listPercBandwidthUsed.append(float(bwUsed)/self.topology.linksCapacity*100)
                self.listPercVnfUsed.append(float(nbVnfUsed)/self.nbVnf*100)
                maxBwServed = 0
                if len(self.topology.listBaseStation) > 0:
                    for (u,v) in self.topology.links:
                        if u in self.topology.listBaseStation:
                            maxBwServed += self.topology.links[0]
                else : 
                    maxBwServed = 1
                bwAllocated = 0
                for s in self.allocateur.listSlicesCurrentlyAllocated:
                    bwAllocated += s.bd
                self.listPercBandwidthAllocated.append(float(bwAllocated)/maxBwServed*100)
                self.listNbSlicesAllocated.append(len(self.allocateur.listSlicesCurrentlyAllocated))
                
                bwAllocated = 0   
                for s in self.allocateur.listSlicesCurrentlyAllocated:
                    bwAllocated += s.bd
                linksUsage, nodesUsage, nodeFunction =  Util.utilisationAndVnfUsed(self.functions, self.allocateur.listSlicesCurrentlyAllocated, self.allocateur.currentAlloc, roundNumber = 8)
                CostVnfsUsed = 0
                for u in nodesUsage:
                    for f in nodeFunction[u]:
                        CostVnfsUsed += self.functions[f][1]
                costByMb = CostVnfsUsed/float(bwAllocated)
                        
        
            
            
            
            self.doIReconfigureNow = True
            self.allocateur.update(self.timeStep, remove = False)
            self.doIReconfigureNow = False
            
            listProfit.append(self.allocateur.profit[int(self.timeStep - param.startDynamic)])
            listCostVnfUsed.append(self.allocateur.CostVnfsUsed[int(self.timeStep - param.startDynamic)])
            
            if self.evaluation:
                
                newBwAllocated = 0   
                for s in self.allocateur.listSlicesCurrentlyAllocated:
                    newBwAllocated += s.bd
                linksUsage, nodesUsage, nodeFunction =  Util.utilisationAndVnfUsed(self.functions, self.allocateur.listSlicesCurrentlyAllocated, self.allocateur.currentAlloc, roundNumber = 8)
                newCostVnfsUsed = 0
                for u in nodesUsage:
                    for f in nodeFunction[u]:
                        newCostVnfsUsed += self.functions[f][1]
                newCostByMb = newCostVnfsUsed/float(newBwAllocated)
                
                self.listImprovVnf.append((CostVnfsUsed - newCostVnfsUsed)/float(CostVnfsUsed)*100)
                self.listImprovCostMb.append((costByMb - newCostByMb)/float(costByMb)*100)
                
            
            
        else:
            
            self.allocateur.update(self.timeStep, remove = False)
            listProfit.append(self.allocateur.profit[int(self.timeStep - param.startDynamic)])
            listCostVnfUsed.append(self.allocateur.CostVnfsUsed[int(self.timeStep - param.startDynamic)])
            
        
        
        #print("{}    {}    {}".format(self.timeStep, self._state, action))
        
        """    #################################################################################################################    """
        """                               We add the new slices before the new reconfiguration                                      """
        """    #################################################################################################################    """   
        for i in range(self.numberOfStepsByState):
            self.timeStep += 1
    
            listSlices = listListSlicesState[i]
            if listSlices == None:
                
                if action == 1:
                    bwAllocated = 0   
                    for s in self.allocateur.listSlicesCurrentlyAllocated:
                        bwAllocated += s.bd
                    linksUsage, nodesUsage, nodeFunction =  Util.utilisationAndVnfUsed(self.functions, self.allocateur.listSlicesCurrentlyAllocated, self.allocateur.currentAlloc, roundNumber = 8)
                    CostVnfsUsed = 0
                    for u in nodesUsage:
                        for f in nodeFunction[u]:
                            CostVnfsUsed += self.functions[f][1]
                

                reward = self.makeReward(listProfit, listProfitNoReconf, listCostVnfUsed, listCostVnfNoReconf, action)
                    
                if self.evaluation and action == 1:
                    bwAllocated = 0   
                    for s in self.allocateur.listSlicesCurrentlyAllocated:
                        bwAllocated += s.bd
                    linksUsage, nodesUsage, nodeFunction =  Util.utilisationAndVnfUsed(self.functions, self.allocateur.listSlicesCurrentlyAllocated, self.allocateur.currentAlloc, roundNumber = 8)
                    CostVnfsUsed = 0
                    for u in nodesUsage:
                        for f in nodeFunction[u]:
                            CostVnfsUsed += self.functions[f][1]
                    #fakeCostByMb = fakeCostVnfsUsed/float(fakeBwAllocated)
                    costByMb = CostVnfsUsed/float(bwAllocated)
                    self.listReward.append(reward)
                    #self.listImprovVnfVsFake.append((fakeCostVnfsUsed - CostVnfsUsed)/float(fakeCostVnfsUsed)*100)
                    #self.listImprovCostMbVsFake.append((fakeCostByMb - costByMb)/float(fakeCostByMb)*100)

                self.rewardTotal += reward
                
                self._episode_ended = True
                
                self.listInstanceAlreadyTrained.append(self.listInstanceFiles[0])
                #self.saveTest()
                if self.evaluation:
                    self.saveEvaluation()
                del(self.listInstanceFiles[0])
                print("Number reconf done {}".format(sum(self.reconfsDone)))
                print("Reward : {}".format(self.rewardTotal))
                
                
                """print("")
                print("    {}    {}    {}    {}".format(action, realFirstCost, costByMb, reward))"""
    
                self._state = self.makeState()
                
            
        
                return ts.termination(np.array([self._state], dtype=np.float32), reward=reward)
                
            else:
                for s in listSlices:
                    self.allocateur.addSlice(s, self.timeStep)
                    if self.allocateur.listSlicesAccepted[-1].id == s.id:
                        self.nbSlicesAddSinceLastReconf+=1
                    else:
                        self.nbSlicesRejectSinceLastReconf
                    
                self.nbMinutesSinceLastReconf +=1
                    
                nbSlices = len(self.allocateur.listSlicesCurrentlyAllocated)
                #We do not do the update for the last, it will be done at the next step
                if i < self.numberOfStepsByState-1:
                    self.allocateur.update(self.timeStep)
                    listProfit.append(self.allocateur.profit[int(self.timeStep - param.startDynamic)])
                    listCostVnfUsed.append(self.allocateur.CostVnfsUsed[int(self.timeStep - param.startDynamic)])
                    #listProfitNoReconf.append(self.listProfitNoReconf[self.timeStep - param.startDynamic])
                else:
                    self.allocateur.removeSlices(self.timeStep)
                self.nbSlicesRemoveSinceLastReconf += nbSlices-len(self.allocateur.listSlicesCurrentlyAllocated)
                    
        listCostVnfAdditionnal, listBwAllocatedAdditionnal, listProfitAdditionnal = listCostVnfUsed, [], listProfit     
        if action == 1:
            bwAllocated = 0   
            for s in self.allocateur.listSlicesCurrentlyAllocated:
                bwAllocated += s.bd
            linksUsage, nodesUsage, nodeFunction =  Util.utilisationAndVnfUsed(self.functions, self.allocateur.listSlicesCurrentlyAllocated, self.allocateur.currentAlloc, roundNumber = 8)
            CostVnfsUsed = 0
            for u in nodesUsage:
                for f in nodeFunction[u]:
                    CostVnfsUsed += self.functions[f][1]
        
        

            #We complete by a knew fake alloc to compare with more step (numberOfStepsForCost) than just numberOfStepsByState
            listCostVnfAdditionnal, listBwAllocatedAdditionnal, listProfitAdditionnal = self.fakeAlloc(self.allocateur, listListSlicesCost) 
            for i in range(len(listProfit)):
                listProfitAdditionnal.append(listProfit[i])
                #listBwAllocatedAdditionnal.append(aaaaa[i])
                listCostVnfAdditionnal.append(listCostVnfUsed[i])
        reward = self.makeReward(listProfitAdditionnal, listProfitNoReconf, listCostVnfAdditionnal, listCostVnfNoReconf, action)
        
        
        if self.evaluation and action == 1:
            bwAllocated = 0   
            for s in self.allocateur.listSlicesCurrentlyAllocated:
                bwAllocated += s.bd
            linksUsage, nodesUsage, nodeFunction =  Util.utilisationAndVnfUsed(self.functions, self.allocateur.listSlicesCurrentlyAllocated, self.allocateur.currentAlloc, roundNumber = 8)
            CostVnfsUsed = 0
            for u in nodesUsage:
                for f in nodeFunction[u]:
                    CostVnfsUsed += self.functions[f][1]
            #fakeCostByMb = fakeCostVnfsUsed/float(fakeBwAllocated)
            costByMb = CostVnfsUsed/float(bwAllocated)
            self.listReward.append(reward)
            #self.listImprovVnfVsFake.append((fakeCostVnfsUsed - CostVnfsUsed)/float(fakeCostVnfsUsed)*100)
            #self.listImprovCostMbVsFake.append((fakeCostByMb - costByMb)/float(fakeCostByMb)*100)

        
        
        
        self.rewardTotal += reward
        """print("")
        print("    {}    {}    {}    {}".format(action, realFirstCost, costByMb, reward))"""
        
        self._state = self.makeState()
        
        #print("{} {}".format(reward, self._state))
    
        return ts.transition(np.array([self._state], dtype=np.float32), reward=reward, discount=self.gammaDiscount)