def env_init(self):
        """
            Based on the levin model, the dispersion probability is initialized.
        """
        self.dispersionModel = InvasiveUtility.Levin
        notDirectedG = networkx.Graph(self.simulationParameterObj.graph)
        adjMatrix = adjacency_matrix(notDirectedG)

        edges = self.simulationParameterObj.graph.edges()
        simulationParameterObj = self.simulationParameterObj
        if self.dispersionModel == InvasiveUtility.Levin:
            parameters = InvasiveUtility.calculatePath(notDirectedG,adjMatrix, edges, simulationParameterObj.downStreamRate,
                simulationParameterObj.upStreamRate)
            C = (1 - simulationParameterObj.upStreamRate * simulationParameterObj.downStreamRate) / (
                (1 - 2 * simulationParameterObj.upStreamRate) * (1 - simulationParameterObj.downStreamRate))
            self.dispertionTable = np.dot(1 / C, parameters)
            self.germinationObj = GerminationDispersionParameterClass(1, 1)
        #calculating the worst case fully invaded rivers cost
        worst_case = repmat(1, 1, self.simulationParameterObj.nbrReaches * self.simulationParameterObj.habitatSize)[0]
        cost_state_unit = InvasiveUtility.get_unit_invaded_reaches(worst_case,
            self.simulationParameterObj.habitatSize) * self.actionParameterObj.costPerReach
        stateCost = cost_state_unit + InvasiveUtility.get_invaded_reaches(
            worst_case) * self.actionParameterObj.costPerTree
        stateCost = stateCost + InvasiveUtility.get_empty_slots(worst_case) * self.actionParameterObj.emptyCost
        costAction = InvasiveUtility.get_budget_cost_actions(repmat(3, 1, self.simulationParameterObj.nbrReaches)[0],
            worst_case, self.actionParameterObj)
        networkx.adjacency_matrix(self.simulationParameterObj.graph)
        return "VERSION RL-Glue-3.0 PROBLEMTYPE non-episodic DISCOUNTFACTOR " + str(
            self.discountFactor) + " OBSERVATIONS INTS (" + str(
            self.simulationParameterObj.nbrReaches * self.simulationParameterObj.habitatSize) + " 1 3) ACTIONS INTS (" + str(
            self.simulationParameterObj.nbrReaches) + " 1 4) REWARDS (" + str(self.Bad_Action_Penalty)+" "+str(
            -1 * (costAction + stateCost)) + ") EXTRA "+str(self.simulationParameterObj.graph.edges()) + " BUDGET "+str(self.actionParameterObj.budget) +" by Majid Taleghan."
Ejemplo n.º 2
0
    def env_init(self):
        """
            Based on the levin model, the dispersion probability is initialized.
        """
        self.dispersionModel = InvasiveUtility.Levin
        notDirectedG = networkx.Graph(self.simulationParameterObj.graph)
        adjMatrix = adjacency_matrix(notDirectedG)

        edges = self.simulationParameterObj.graph.edges()
        simulationParameterObj = self.simulationParameterObj
        if self.dispersionModel == InvasiveUtility.Levin:
            parameters = InvasiveUtility.calculatePath(notDirectedG,adjMatrix, edges, simulationParameterObj.downStreamRate,
                simulationParameterObj.upStreamRate)
            C = (1 - simulationParameterObj.upStreamRate * simulationParameterObj.downStreamRate) / (
                (1 - 2 * simulationParameterObj.upStreamRate) * (1 - simulationParameterObj.downStreamRate))
            self.dispertionTable = np.dot(1 / C, parameters)
            self.germinationObj = GerminationDispersionParameterClass(1, 1)
        #calculating the worst case fully invaded rivers cost
        worst_case = repmat(1, 1, self.simulationParameterObj.nbrReaches * self.simulationParameterObj.habitatSize)[0]
        cost_state_unit = InvasiveUtility.get_unit_invaded_reaches(worst_case,
            self.simulationParameterObj.habitatSize) * self.actionParameterObj.costPerReach
        stateCost = cost_state_unit + InvasiveUtility.get_invaded_reaches(
            worst_case) * self.actionParameterObj.costPerTree
        stateCost = stateCost + InvasiveUtility.get_empty_slots(worst_case) * self.actionParameterObj.emptyCost
        costAction = InvasiveUtility.get_budget_cost_actions(repmat(3, 1, self.simulationParameterObj.nbrReaches)[0],
            worst_case, self.actionParameterObj)
        networkx.adjacency_matrix(self.simulationParameterObj.graph)
        return "VERSION RL-Glue-3.0 PROBLEMTYPE non-episodic DISCOUNTFACTOR " + str(
            self.discountFactor) + " OBSERVATIONS INTS (" + str(
            self.simulationParameterObj.nbrReaches * self.simulationParameterObj.habitatSize) + " 1 3) ACTIONS INTS (" + str(
            self.simulationParameterObj.nbrReaches) + " 1 4) REWARDS (" + str(self.Bad_Action_Penalty)+" "+str(
            -1 * (costAction + stateCost)) + ") EXTRA "+str(self.simulationParameterObj.graph.edges()) + " BUDGET "+str(self.actionParameterObj.budget) +" by Majid Taleghan."
Ejemplo n.º 3
0
    def random_player(self,state):
		#find the actions for the state
        stateId = SamplingUtility.getStateId(state)
        #print 'state '+ str(state)[1:-1]
        #if len(self.Q_value_function) == 0 or not self.Q_value_function.has_key(stateId): #len() : Return the length (the number of items) of an object. 
        self.all_allowed_actions[stateId] = InvasiveUtility.getActions(state, self.nbrReaches, self.habitatSize)
            #self.Q_value_function[stateId] = len(self.all_allowed_actions[stateId]) * [0.0]
            
        index = self.randGenerator.randint(0, len(self.all_allowed_actions[stateId]) - 1)
        return self.all_allowed_actions[stateId][index]
    def env_step(self, action):
        action = action.intArray

        if len(action) != 3:
            print action, len(action)

        assert len(action) == self.simulationParameterObj.nbrReaches, "Expected " + str(
            self.simulationParameterObj.nbrReaches) + " integer action."

        if not InvasiveUtility.is_action_allowable(action, self.state):
            theObs = Observation()
            InvasiveUtility.is_action_allowable(action, self.state)
            #map(int, results)
            theObs.intArray = [-1]
            returnRO = Reward_observation_terminal()
            returnRO.r = self.Bad_Action_Penalty
            returnRO.o = theObs
            return returnRO

        cost_state_unit = InvasiveUtility.get_unit_invaded_reaches(self.state,
            self.simulationParameterObj.habitatSize) * self.actionParameterObj.costPerReach
        stateCost = cost_state_unit + InvasiveUtility.get_invaded_reaches(
            self.state) * self.actionParameterObj.costPerTree

        stateCost = stateCost + InvasiveUtility.get_empty_slots(self.state) * self.actionParameterObj.emptyCost

        costAction = InvasiveUtility.get_budget_cost_actions(action, self.state, self.actionParameterObj)

        if costAction > self.actionParameterObj.budget:
            theObs = Observation()
            InvasiveUtility.is_action_allowable(action, self.state)
            #map(int, results)
            theObs.intArray = [-1]
            returnRO = Reward_observation_terminal()
            returnRO.r = self.Bad_Action_Penalty
            returnRO.o = theObs
            return returnRO

        nextState = simulateNextState(self.state, action, self.simulationParameterObj,
            self.actionParameterObj, self.dispertionTable, self.germinationObj)
        self.state = nextState
        theObs = Observation()
        theObs.intArray = self.state
        returnRO = Reward_observation_terminal()
        returnRO.r = -1 * (costAction + stateCost)
        returnRO.o = theObs
        return returnRO
    def __init__(self, simulationParameterObj, actionParameterObj, Bad_Action_Penalty, nbrReaches=REACHES, habitatSize=HABITATS, fixedStartState=False,
                 discountFactor=0.9, seed=None):
        """
        :param simulationParameterObj (SimulationParameterClass), contains all the parameters for the domain
        :param actionParameterObj (ActionParameterClass), contains all the parameters for the actions
        :param Bad_Action_Penalty (float), a negative value which will be returned as the consequence of over-budget
        action or non-allowable action on a state
        :param nbrReaches (int), number of reaches in the river network
        :param habitatSize (int), number of habitat in each reach
        :param fixedStartState (bool), indicates using a random starting state or fixed starting state
        :param discountFactor (float), discount factor
        :param seed (int), seed for random number generator (default=None)
        """
        self.seed = seed
        self.fixedStartState = fixedStartState
        self.discountFactor = discountFactor
        self.Bad_Action_Penalty=Bad_Action_Penalty
        if not self.seed is None:
            self.randGenerator = random.Random(self.seed)
        else:
            self.randGenerator = random.Random()
        if simulationParameterObj != None:
            self.simulationParameterObj = simulationParameterObj
            self.actionParameterObj = actionParameterObj
            self.dispertionTable = []
            self.germinationObj = None
        else:
            #upstream rate
            upStreamRate = 0.1
            #downstream rate
            downStreamRate = 0.5
            #exogenous arrival indicator
            exogenousArrivalIndicator = SimulationParameterClass.ExogenousArrivalOn
            #competiton parameter
            competitionFactor = 1
            #there is the same number of
            reachArrivalRates = array([[random.randint(100, 1000) for i in xrange(2)] for i in xrange(nbrReaches)])
            reachArrivalProbs = array([[random.random() for i in xrange(2)] for i in xrange(nbrReaches)])
            #first value is for native and the second one for tamarisk
            prodRate = [200, 200]
            #first value is for native and the second one for tamarisk
            deathRate = [0.2, 0.2]
            graph = InvasiveUtility.createRandomGraph(nbrReaches + 1, balanced=True,randGenerator=self.randGenerator)

            self.simulationParameterObj = SimulationParameterClass(nbrReaches, habitatSize, prodRate, deathRate,
                exogenousArrivalIndicator, reachArrivalRates, reachArrivalProbs, upStreamRate, downStreamRate,
                competitionFactor, graph)

            self.actionParameterObj = ActionParameterClass(costPerTree=0.1, eradicationCost=0.5, restorationCost=0.9,
                eradicationRate=1, restorationRate=1,
                costPerReach=10, emptyCost=0.05, varEradicationCost=0.4, varInvasiveRestorationCost=0.8,
                varEmptyRestorationCost=0.4, budget=100)
Ejemplo n.º 6
0
 def egreedy(self, state):
     #find the actions for the state
     stateId = SamplingUtility.getStateId(state)
     #print 'state '+ str(state)[1:-1]
     if len(self.Q_value_function) == 0 or not self.Q_value_function.has_key(stateId):
         self.all_allowed_actions[stateId] = InvasiveUtility.getActions(state, self.nbrReaches, self.habitatSize)
         self.Q_value_function[stateId] = len(self.all_allowed_actions[stateId]) * [0.0]
     if not self.exploringFrozen and self.randGenerator.random() < self.sarsa_epsilon:
         index = self.randGenerator.randint(0, len(self.all_allowed_actions[stateId]) - 1)
     else:
         index = self.Q_value_function[stateId].index(max(self.Q_value_function[stateId]))
     #print 'a '+str(self.all_allowed_actions[stateId][index])[1:-1]
     return self.all_allowed_actions[stateId][index]
Ejemplo n.º 7
0
    def random_player(self, state):
        #find the actions for the state
        stateId = SamplingUtility.getStateId(state)
        #print 'state '+ str(state)[1:-1]
        #if len(self.Q_value_function) == 0 or not self.Q_value_function.has_key(stateId): #len() : Return the length (the number of items) of an object.
        self.all_allowed_actions[stateId] = InvasiveUtility.getActions(
            state, self.nbrReaches, self.habitatSize)
        #self.Q_value_function[stateId] = len(self.all_allowed_actions[stateId]) * [0.0]

        index = self.randGenerator.randint(
            0,
            len(self.all_allowed_actions[stateId]) - 1)
        return self.all_allowed_actions[stateId][index]
Ejemplo n.º 8
0
    def __init__(self, simulationParameterObj, actionParameterObj, Bad_Action_Penalty, nbrReaches=7, habitatSize=4, fixedStartState=False,
                 discountFactor=0.9, seed=None):
        """
        :param simulationParameterObj (SimulationParameterClass), contains all the parameters for the domain
        :param actionParameterObj (ActionParameterClass), contains all the parameters for the actions
        :param Bad_Action_Penalty (float), a negative value which will be returned as the consequence of over-budget
        action or non-allowable action on a state
        :param nbrReaches (int), number of reaches in the river network
        :param habitatSize (int), number of habitat in each reach
        :param fixedStartState (bool), indicates using a random starting state or fixed starting state
        :param discountFactor (float), discount factor
        :param seed (int), seed for random number generator (default=None)
        """
        self.seed = seed
        self.fixedStartState = fixedStartState
        self.discountFactor = discountFactor
        self.Bad_Action_Penalty=Bad_Action_Penalty
        if not self.seed is None:
            self.randGenerator = random.Random(self.seed)
        else:
            self.randGenerator = random.Random()
        if simulationParameterObj != None:
            self.simulationParameterObj = simulationParameterObj
            self.actionParameterObj = actionParameterObj
            self.dispertionTable = []
            self.germinationObj = None
        else:
            #upstream rate
            upStreamRate = 0.1
            #downstream rate
            downStreamRate = 0.5
            #exogenous arrival indicator
            exogenousArrivalIndicator = SimulationParameterClass.ExogenousArrivalOn
            #competiton parameter
            competitionFactor = 1
            #there is the same number of
            reachArrivalRates = array([[random.randint(100, 1000) for i in xrange(2)] for i in xrange(nbrReaches)])
            reachArrivalProbs = array([[random.random() for i in xrange(2)] for i in xrange(nbrReaches)])
            #first value is for native and the second one for tamarisk
            prodRate = [200, 200]
            #first value is for native and the second one for tamarisk
            deathRate = [0.2, 0.2]
            graph = InvasiveUtility.createRandomGraph(nbrReaches + 1, balanced=True,randGenerator=self.randGenerator)

            self.simulationParameterObj = SimulationParameterClass(nbrReaches, habitatSize, prodRate, deathRate,
                exogenousArrivalIndicator, reachArrivalRates, reachArrivalProbs, upStreamRate, downStreamRate,
                competitionFactor, graph)
            self.actionParameterObj = ActionParameterClass(costPerTree=0.1, eradicationCost=0.5, restorationCost=0.9,
                eradicationRate=1, restorationRate=1,
                costPerReach=1, emptyCost=0, varEradicationCost=0.5, varInvasiveRestorationCost=0.1,
                varEmptyRestorationCost=0, budget=100)
Ejemplo n.º 9
0
    def agent_step(self, reward, observation):
		
        lastState = self.lastObservation.intArray
        lastAction = self.lastAction.intArray
        lastStateId = SamplingUtility.getStateId(lastState)
        lastActionIdx = self.all_allowed_actions[lastStateId].index(tuple(lastAction))
        if reward == self.Bad_Action_Penalty:
            self.all_allowed_actions[lastStateId].pop(lastActionIdx)
            self.Q_value_function[lastStateId].pop(lastActionIdx)
            newAction = self.egreedy(self.lastObservation.intArray)
            print InvasiveUtility.get_budget_cost_actions(lastAction, lastState, self.actionParameterObj)
            returnAction = Action()
            returnAction.intArray = newAction
            self.lastAction = copy.deepcopy(returnAction)
            return returnAction

        newState = observation.intArray
        newAction = self.egreedy(newState)
        if type(newAction) is tuple:
            newAction = list(newAction)
        Q_sa = self.Q_value_function[lastStateId][lastActionIdx]
        #print "THE Q_sa IS : "
        #print Q_sa
        Q_sprime_aprime = self.Q_value_function[SamplingUtility.getStateId(newState)][
                          self.all_allowed_actions[SamplingUtility.getStateId(newState)].index(tuple(newAction))]
        new_Q_sa = Q_sa + self.sarsa_stepsize * (reward + self.sarsa_gamma * Q_sprime_aprime - Q_sa)
        #print "THE new_Q_sa IS : "
        #print new_Q_sa
        if not self.policyFrozen:
            self.Q_value_function[SamplingUtility.getStateId(lastState)][
            self.all_allowed_actions[SamplingUtility.getStateId(lastState)].index(tuple(lastAction))] = new_Q_sa
        returnAction = Action()
        returnAction.intArray = newAction
        self.lastAction = copy.deepcopy(returnAction)
        self.lastObservation = copy.deepcopy(observation)
        return returnAction
Ejemplo n.º 10
0
    def env_step(self, action):
        action = action.intArray
        assert len(action) == self.simulationParameterObj.nbrReaches, "Expected " + str(
            self.simulationParameterObj.nbrReaches) + " integer action."
        if not InvasiveUtility.is_action_allowable(action, self.state):
            theObs = Observation()
            InvasiveUtility.is_action_allowable(action, self.state)
            #map(int, results)
            theObs.intArray = [-1]
            returnRO = Reward_observation_terminal()
            returnRO.r = self.Bad_Action_Penalty
            returnRO.o = theObs
            return returnRO
        cost_state_unit = InvasiveUtility.get_unit_invaded_reaches(self.state,
            self.simulationParameterObj.habitatSize) * self.actionParameterObj.costPerReach
        stateCost = cost_state_unit + InvasiveUtility.get_invaded_reaches(
            self.state) * self.actionParameterObj.costPerTree
        stateCost = stateCost + InvasiveUtility.get_empty_slots(self.state) * self.actionParameterObj.emptyCost
        costAction = InvasiveUtility.get_budget_cost_actions(action, self.state, self.actionParameterObj)
        if costAction > self.actionParameterObj.budget:
            theObs = Observation()
            InvasiveUtility.is_action_allowable(action, self.state)
            #map(int, results)
            theObs.intArray = [-1]
            returnRO = Reward_observation_terminal()
            returnRO.r = self.Bad_Action_Penalty
            returnRO.o = theObs
            return returnRO

        nextState = simulateNextState(self.state, action, self.simulationParameterObj,
            self.actionParameterObj, self.dispertionTable, self.germinationObj)
        self.state = nextState
        theObs = Observation()
        theObs.intArray = self.state
        returnRO = Reward_observation_terminal()
        returnRO.r = -1 * (costAction + stateCost)
        returnRO.o = theObs
        return returnRO
    def __init__(self, simulationParameterObj, actionParameterObj, Bad_Action_Penalty, nbrReaches=7, habitatSize=4, fixedStartState=False,
                 discountFactor=0.9, seed=None):
        """
        :param simulationParameterObj (SimulationParameterClass), contains all the parameters for the domain
        :param actionParameterObj (ActionParameterClass), contains all the parameters for the actions
        :param Bad_Action_Penalty (float), a negative value which will be returned as the consequence of over-budget
        action or non-allowable action on a state
        :param nbrReaches (int), number of reaches in the river network
        :param habitatSize (int), number of habitat in each reach
        :param fixedStartState (bool), indicates using a random starting state or fixed starting state
        :param discountFactor (float), discount factor
        :param seed (int), seed for random number generator (default=None)
        """        
        self.seed = seed
        self.fixedStartState = fixedStartState
        self.discountFactor = discountFactor
        self.Bad_Action_Penalty=Bad_Action_Penalty
        if not self.seed is None:
            self.randGenerator = random.Random(self.seed)
        else:
            self.randGenerator = random.Random()
        if simulationParameterObj != None:
            self.simulationParameterObj = simulationParameterObj
            self.actionParameterObj = actionParameterObj
            self.dispersionTable = []
            self.germinationObj = None
        else:
            
# ============================ PARAMETERS =====================================
            
            #upstream rate
            upStreamRate = 0.1
            #downstream rate
            downStreamRate = 0.5
            #exogenous arrival indicator
            exogenousArrivalIndicator = SimulationParameterClass.ExogenousArrivalOn
            #exogenousArrivalIndicator = SimulationParameterClass.ExogenousArrivalOff
            #competiton parameter
            competitionFactor = 1
            #there is the same number of
            reachArrivalRates = array([[random.randint(100, 1000) for i in xrange(2)] for i in xrange(nbrReaches)])
            reachArrivalProbs = array([[random.random() for i in xrange(2)] for i in xrange(nbrReaches)])
            #first value is for native and the second one for tamarisk
            prodRate = [200, 200]
            #first value is for native and the second one for tamarisk
            deathRate = [0.2, 0.2]
            
# ============================ PARAMETERS =====================================
            
            print "arrival rates (N, T)"
            print reachArrivalRates[0]
            print "arrival probs (N,T)"
            print reachArrivalProbs[0]
            print "downstreamrate"
            print downStreamRate
            print "prodRate"
            print prodRate
            print "deathRate"
            print deathRate
            graph = InvasiveUtility.createRandomGraph(nbrReaches + 1, balanced=True,randGenerator=self.randGenerator)

            self.simulationParameterObj = SimulationParameterClass(nbrReaches, habitatSize, prodRate, deathRate,
                exogenousArrivalIndicator, reachArrivalRates, reachArrivalProbs, upStreamRate, downStreamRate,
                competitionFactor, graph)

# ============================ PARAMETERS =====================================
                
            self.actionParameterObj = ActionParameterClass(costPerTree=0.1, eradicationCost=0.5, restorationCost=0.9,
                eradicationRate=0.85, restorationRate=0.65,
                costPerReach=10, emptyCost=0.5, varEradicationCost=0.4, varInvasiveRestorationCost=0.8,
                varEmptyRestorationCost=0.4, budget=100)
                
# ============================ PARAMETERS =====================================
                
        testResults = open("dispersionMatrixAnalysis.txt", "a+")
        testResults.write("\n Number of reaches: "+str(nbrReaches)+"\n");
        testResults.write(" Number of slots: "+str(habitatSize)+"\n");
        testResults.write(" Upstram-Rate: "+str(upStreamRate)+"\n");
        testResults.write(" Downstream-Rate: "+str(downStreamRate)+"\n \n");
        testResults.close();