def env_init(self): """ Based on the levin model, the dispersion probability is initialized. """ self.dispersionModel = InvasiveUtility.Levin notDirectedG = networkx.Graph(self.simulationParameterObj.graph) adjMatrix = adjacency_matrix(notDirectedG) edges = self.simulationParameterObj.graph.edges() simulationParameterObj = self.simulationParameterObj if self.dispersionModel == InvasiveUtility.Levin: parameters = InvasiveUtility.calculatePath(notDirectedG,adjMatrix, edges, simulationParameterObj.downStreamRate, simulationParameterObj.upStreamRate) C = (1 - simulationParameterObj.upStreamRate * simulationParameterObj.downStreamRate) / ( (1 - 2 * simulationParameterObj.upStreamRate) * (1 - simulationParameterObj.downStreamRate)) self.dispertionTable = np.dot(1 / C, parameters) self.germinationObj = GerminationDispersionParameterClass(1, 1) #calculating the worst case fully invaded rivers cost worst_case = repmat(1, 1, self.simulationParameterObj.nbrReaches * self.simulationParameterObj.habitatSize)[0] cost_state_unit = InvasiveUtility.get_unit_invaded_reaches(worst_case, self.simulationParameterObj.habitatSize) * self.actionParameterObj.costPerReach stateCost = cost_state_unit + InvasiveUtility.get_invaded_reaches( worst_case) * self.actionParameterObj.costPerTree stateCost = stateCost + InvasiveUtility.get_empty_slots(worst_case) * self.actionParameterObj.emptyCost costAction = InvasiveUtility.get_budget_cost_actions(repmat(3, 1, self.simulationParameterObj.nbrReaches)[0], worst_case, self.actionParameterObj) networkx.adjacency_matrix(self.simulationParameterObj.graph) return "VERSION RL-Glue-3.0 PROBLEMTYPE non-episodic DISCOUNTFACTOR " + str( self.discountFactor) + " OBSERVATIONS INTS (" + str( self.simulationParameterObj.nbrReaches * self.simulationParameterObj.habitatSize) + " 1 3) ACTIONS INTS (" + str( self.simulationParameterObj.nbrReaches) + " 1 4) REWARDS (" + str(self.Bad_Action_Penalty)+" "+str( -1 * (costAction + stateCost)) + ") EXTRA "+str(self.simulationParameterObj.graph.edges()) + " BUDGET "+str(self.actionParameterObj.budget) +" by Majid Taleghan."
def random_player(self,state): #find the actions for the state stateId = SamplingUtility.getStateId(state) #print 'state '+ str(state)[1:-1] #if len(self.Q_value_function) == 0 or not self.Q_value_function.has_key(stateId): #len() : Return the length (the number of items) of an object. self.all_allowed_actions[stateId] = InvasiveUtility.getActions(state, self.nbrReaches, self.habitatSize) #self.Q_value_function[stateId] = len(self.all_allowed_actions[stateId]) * [0.0] index = self.randGenerator.randint(0, len(self.all_allowed_actions[stateId]) - 1) return self.all_allowed_actions[stateId][index]
def env_step(self, action): action = action.intArray if len(action) != 3: print action, len(action) assert len(action) == self.simulationParameterObj.nbrReaches, "Expected " + str( self.simulationParameterObj.nbrReaches) + " integer action." if not InvasiveUtility.is_action_allowable(action, self.state): theObs = Observation() InvasiveUtility.is_action_allowable(action, self.state) #map(int, results) theObs.intArray = [-1] returnRO = Reward_observation_terminal() returnRO.r = self.Bad_Action_Penalty returnRO.o = theObs return returnRO cost_state_unit = InvasiveUtility.get_unit_invaded_reaches(self.state, self.simulationParameterObj.habitatSize) * self.actionParameterObj.costPerReach stateCost = cost_state_unit + InvasiveUtility.get_invaded_reaches( self.state) * self.actionParameterObj.costPerTree stateCost = stateCost + InvasiveUtility.get_empty_slots(self.state) * self.actionParameterObj.emptyCost costAction = InvasiveUtility.get_budget_cost_actions(action, self.state, self.actionParameterObj) if costAction > self.actionParameterObj.budget: theObs = Observation() InvasiveUtility.is_action_allowable(action, self.state) #map(int, results) theObs.intArray = [-1] returnRO = Reward_observation_terminal() returnRO.r = self.Bad_Action_Penalty returnRO.o = theObs return returnRO nextState = simulateNextState(self.state, action, self.simulationParameterObj, self.actionParameterObj, self.dispertionTable, self.germinationObj) self.state = nextState theObs = Observation() theObs.intArray = self.state returnRO = Reward_observation_terminal() returnRO.r = -1 * (costAction + stateCost) returnRO.o = theObs return returnRO
def __init__(self, simulationParameterObj, actionParameterObj, Bad_Action_Penalty, nbrReaches=REACHES, habitatSize=HABITATS, fixedStartState=False, discountFactor=0.9, seed=None): """ :param simulationParameterObj (SimulationParameterClass), contains all the parameters for the domain :param actionParameterObj (ActionParameterClass), contains all the parameters for the actions :param Bad_Action_Penalty (float), a negative value which will be returned as the consequence of over-budget action or non-allowable action on a state :param nbrReaches (int), number of reaches in the river network :param habitatSize (int), number of habitat in each reach :param fixedStartState (bool), indicates using a random starting state or fixed starting state :param discountFactor (float), discount factor :param seed (int), seed for random number generator (default=None) """ self.seed = seed self.fixedStartState = fixedStartState self.discountFactor = discountFactor self.Bad_Action_Penalty=Bad_Action_Penalty if not self.seed is None: self.randGenerator = random.Random(self.seed) else: self.randGenerator = random.Random() if simulationParameterObj != None: self.simulationParameterObj = simulationParameterObj self.actionParameterObj = actionParameterObj self.dispertionTable = [] self.germinationObj = None else: #upstream rate upStreamRate = 0.1 #downstream rate downStreamRate = 0.5 #exogenous arrival indicator exogenousArrivalIndicator = SimulationParameterClass.ExogenousArrivalOn #competiton parameter competitionFactor = 1 #there is the same number of reachArrivalRates = array([[random.randint(100, 1000) for i in xrange(2)] for i in xrange(nbrReaches)]) reachArrivalProbs = array([[random.random() for i in xrange(2)] for i in xrange(nbrReaches)]) #first value is for native and the second one for tamarisk prodRate = [200, 200] #first value is for native and the second one for tamarisk deathRate = [0.2, 0.2] graph = InvasiveUtility.createRandomGraph(nbrReaches + 1, balanced=True,randGenerator=self.randGenerator) self.simulationParameterObj = SimulationParameterClass(nbrReaches, habitatSize, prodRate, deathRate, exogenousArrivalIndicator, reachArrivalRates, reachArrivalProbs, upStreamRate, downStreamRate, competitionFactor, graph) self.actionParameterObj = ActionParameterClass(costPerTree=0.1, eradicationCost=0.5, restorationCost=0.9, eradicationRate=1, restorationRate=1, costPerReach=10, emptyCost=0.05, varEradicationCost=0.4, varInvasiveRestorationCost=0.8, varEmptyRestorationCost=0.4, budget=100)
def egreedy(self, state): #find the actions for the state stateId = SamplingUtility.getStateId(state) #print 'state '+ str(state)[1:-1] if len(self.Q_value_function) == 0 or not self.Q_value_function.has_key(stateId): self.all_allowed_actions[stateId] = InvasiveUtility.getActions(state, self.nbrReaches, self.habitatSize) self.Q_value_function[stateId] = len(self.all_allowed_actions[stateId]) * [0.0] if not self.exploringFrozen and self.randGenerator.random() < self.sarsa_epsilon: index = self.randGenerator.randint(0, len(self.all_allowed_actions[stateId]) - 1) else: index = self.Q_value_function[stateId].index(max(self.Q_value_function[stateId])) #print 'a '+str(self.all_allowed_actions[stateId][index])[1:-1] return self.all_allowed_actions[stateId][index]
def random_player(self, state): #find the actions for the state stateId = SamplingUtility.getStateId(state) #print 'state '+ str(state)[1:-1] #if len(self.Q_value_function) == 0 or not self.Q_value_function.has_key(stateId): #len() : Return the length (the number of items) of an object. self.all_allowed_actions[stateId] = InvasiveUtility.getActions( state, self.nbrReaches, self.habitatSize) #self.Q_value_function[stateId] = len(self.all_allowed_actions[stateId]) * [0.0] index = self.randGenerator.randint( 0, len(self.all_allowed_actions[stateId]) - 1) return self.all_allowed_actions[stateId][index]
def __init__(self, simulationParameterObj, actionParameterObj, Bad_Action_Penalty, nbrReaches=7, habitatSize=4, fixedStartState=False, discountFactor=0.9, seed=None): """ :param simulationParameterObj (SimulationParameterClass), contains all the parameters for the domain :param actionParameterObj (ActionParameterClass), contains all the parameters for the actions :param Bad_Action_Penalty (float), a negative value which will be returned as the consequence of over-budget action or non-allowable action on a state :param nbrReaches (int), number of reaches in the river network :param habitatSize (int), number of habitat in each reach :param fixedStartState (bool), indicates using a random starting state or fixed starting state :param discountFactor (float), discount factor :param seed (int), seed for random number generator (default=None) """ self.seed = seed self.fixedStartState = fixedStartState self.discountFactor = discountFactor self.Bad_Action_Penalty=Bad_Action_Penalty if not self.seed is None: self.randGenerator = random.Random(self.seed) else: self.randGenerator = random.Random() if simulationParameterObj != None: self.simulationParameterObj = simulationParameterObj self.actionParameterObj = actionParameterObj self.dispertionTable = [] self.germinationObj = None else: #upstream rate upStreamRate = 0.1 #downstream rate downStreamRate = 0.5 #exogenous arrival indicator exogenousArrivalIndicator = SimulationParameterClass.ExogenousArrivalOn #competiton parameter competitionFactor = 1 #there is the same number of reachArrivalRates = array([[random.randint(100, 1000) for i in xrange(2)] for i in xrange(nbrReaches)]) reachArrivalProbs = array([[random.random() for i in xrange(2)] for i in xrange(nbrReaches)]) #first value is for native and the second one for tamarisk prodRate = [200, 200] #first value is for native and the second one for tamarisk deathRate = [0.2, 0.2] graph = InvasiveUtility.createRandomGraph(nbrReaches + 1, balanced=True,randGenerator=self.randGenerator) self.simulationParameterObj = SimulationParameterClass(nbrReaches, habitatSize, prodRate, deathRate, exogenousArrivalIndicator, reachArrivalRates, reachArrivalProbs, upStreamRate, downStreamRate, competitionFactor, graph) self.actionParameterObj = ActionParameterClass(costPerTree=0.1, eradicationCost=0.5, restorationCost=0.9, eradicationRate=1, restorationRate=1, costPerReach=1, emptyCost=0, varEradicationCost=0.5, varInvasiveRestorationCost=0.1, varEmptyRestorationCost=0, budget=100)
def agent_step(self, reward, observation): lastState = self.lastObservation.intArray lastAction = self.lastAction.intArray lastStateId = SamplingUtility.getStateId(lastState) lastActionIdx = self.all_allowed_actions[lastStateId].index(tuple(lastAction)) if reward == self.Bad_Action_Penalty: self.all_allowed_actions[lastStateId].pop(lastActionIdx) self.Q_value_function[lastStateId].pop(lastActionIdx) newAction = self.egreedy(self.lastObservation.intArray) print InvasiveUtility.get_budget_cost_actions(lastAction, lastState, self.actionParameterObj) returnAction = Action() returnAction.intArray = newAction self.lastAction = copy.deepcopy(returnAction) return returnAction newState = observation.intArray newAction = self.egreedy(newState) if type(newAction) is tuple: newAction = list(newAction) Q_sa = self.Q_value_function[lastStateId][lastActionIdx] #print "THE Q_sa IS : " #print Q_sa Q_sprime_aprime = self.Q_value_function[SamplingUtility.getStateId(newState)][ self.all_allowed_actions[SamplingUtility.getStateId(newState)].index(tuple(newAction))] new_Q_sa = Q_sa + self.sarsa_stepsize * (reward + self.sarsa_gamma * Q_sprime_aprime - Q_sa) #print "THE new_Q_sa IS : " #print new_Q_sa if not self.policyFrozen: self.Q_value_function[SamplingUtility.getStateId(lastState)][ self.all_allowed_actions[SamplingUtility.getStateId(lastState)].index(tuple(lastAction))] = new_Q_sa returnAction = Action() returnAction.intArray = newAction self.lastAction = copy.deepcopy(returnAction) self.lastObservation = copy.deepcopy(observation) return returnAction
def env_step(self, action): action = action.intArray assert len(action) == self.simulationParameterObj.nbrReaches, "Expected " + str( self.simulationParameterObj.nbrReaches) + " integer action." if not InvasiveUtility.is_action_allowable(action, self.state): theObs = Observation() InvasiveUtility.is_action_allowable(action, self.state) #map(int, results) theObs.intArray = [-1] returnRO = Reward_observation_terminal() returnRO.r = self.Bad_Action_Penalty returnRO.o = theObs return returnRO cost_state_unit = InvasiveUtility.get_unit_invaded_reaches(self.state, self.simulationParameterObj.habitatSize) * self.actionParameterObj.costPerReach stateCost = cost_state_unit + InvasiveUtility.get_invaded_reaches( self.state) * self.actionParameterObj.costPerTree stateCost = stateCost + InvasiveUtility.get_empty_slots(self.state) * self.actionParameterObj.emptyCost costAction = InvasiveUtility.get_budget_cost_actions(action, self.state, self.actionParameterObj) if costAction > self.actionParameterObj.budget: theObs = Observation() InvasiveUtility.is_action_allowable(action, self.state) #map(int, results) theObs.intArray = [-1] returnRO = Reward_observation_terminal() returnRO.r = self.Bad_Action_Penalty returnRO.o = theObs return returnRO nextState = simulateNextState(self.state, action, self.simulationParameterObj, self.actionParameterObj, self.dispertionTable, self.germinationObj) self.state = nextState theObs = Observation() theObs.intArray = self.state returnRO = Reward_observation_terminal() returnRO.r = -1 * (costAction + stateCost) returnRO.o = theObs return returnRO
def __init__(self, simulationParameterObj, actionParameterObj, Bad_Action_Penalty, nbrReaches=7, habitatSize=4, fixedStartState=False, discountFactor=0.9, seed=None): """ :param simulationParameterObj (SimulationParameterClass), contains all the parameters for the domain :param actionParameterObj (ActionParameterClass), contains all the parameters for the actions :param Bad_Action_Penalty (float), a negative value which will be returned as the consequence of over-budget action or non-allowable action on a state :param nbrReaches (int), number of reaches in the river network :param habitatSize (int), number of habitat in each reach :param fixedStartState (bool), indicates using a random starting state or fixed starting state :param discountFactor (float), discount factor :param seed (int), seed for random number generator (default=None) """ self.seed = seed self.fixedStartState = fixedStartState self.discountFactor = discountFactor self.Bad_Action_Penalty=Bad_Action_Penalty if not self.seed is None: self.randGenerator = random.Random(self.seed) else: self.randGenerator = random.Random() if simulationParameterObj != None: self.simulationParameterObj = simulationParameterObj self.actionParameterObj = actionParameterObj self.dispersionTable = [] self.germinationObj = None else: # ============================ PARAMETERS ===================================== #upstream rate upStreamRate = 0.1 #downstream rate downStreamRate = 0.5 #exogenous arrival indicator exogenousArrivalIndicator = SimulationParameterClass.ExogenousArrivalOn #exogenousArrivalIndicator = SimulationParameterClass.ExogenousArrivalOff #competiton parameter competitionFactor = 1 #there is the same number of reachArrivalRates = array([[random.randint(100, 1000) for i in xrange(2)] for i in xrange(nbrReaches)]) reachArrivalProbs = array([[random.random() for i in xrange(2)] for i in xrange(nbrReaches)]) #first value is for native and the second one for tamarisk prodRate = [200, 200] #first value is for native and the second one for tamarisk deathRate = [0.2, 0.2] # ============================ PARAMETERS ===================================== print "arrival rates (N, T)" print reachArrivalRates[0] print "arrival probs (N,T)" print reachArrivalProbs[0] print "downstreamrate" print downStreamRate print "prodRate" print prodRate print "deathRate" print deathRate graph = InvasiveUtility.createRandomGraph(nbrReaches + 1, balanced=True,randGenerator=self.randGenerator) self.simulationParameterObj = SimulationParameterClass(nbrReaches, habitatSize, prodRate, deathRate, exogenousArrivalIndicator, reachArrivalRates, reachArrivalProbs, upStreamRate, downStreamRate, competitionFactor, graph) # ============================ PARAMETERS ===================================== self.actionParameterObj = ActionParameterClass(costPerTree=0.1, eradicationCost=0.5, restorationCost=0.9, eradicationRate=0.85, restorationRate=0.65, costPerReach=10, emptyCost=0.5, varEradicationCost=0.4, varInvasiveRestorationCost=0.8, varEmptyRestorationCost=0.4, budget=100) # ============================ PARAMETERS ===================================== testResults = open("dispersionMatrixAnalysis.txt", "a+") testResults.write("\n Number of reaches: "+str(nbrReaches)+"\n"); testResults.write(" Number of slots: "+str(habitatSize)+"\n"); testResults.write(" Upstram-Rate: "+str(upStreamRate)+"\n"); testResults.write(" Downstream-Rate: "+str(downStreamRate)+"\n \n"); testResults.close();