def env_init(self): """ Based on the levin model, the dispersion probability is initialized. """ self.dispersionModel = InvasiveUtility.Levin notDirectedG = networkx.Graph(self.simulationParameterObj.graph) adjMatrix = adjacency_matrix(notDirectedG) edges = self.simulationParameterObj.graph.edges() simulationParameterObj = self.simulationParameterObj if self.dispersionModel == InvasiveUtility.Levin: parameters = InvasiveUtility.calculatePath(notDirectedG,adjMatrix, edges, simulationParameterObj.downStreamRate, simulationParameterObj.upStreamRate) C = (1 - simulationParameterObj.upStreamRate * simulationParameterObj.downStreamRate) / ( (1 - 2 * simulationParameterObj.upStreamRate) * (1 - simulationParameterObj.downStreamRate)) self.dispertionTable = np.dot(1 / C, parameters) self.germinationObj = GerminationDispersionParameterClass(1, 1) #calculating the worst case fully invaded rivers cost worst_case = repmat(1, 1, self.simulationParameterObj.nbrReaches * self.simulationParameterObj.habitatSize)[0] cost_state_unit = InvasiveUtility.get_unit_invaded_reaches(worst_case, self.simulationParameterObj.habitatSize) * self.actionParameterObj.costPerReach stateCost = cost_state_unit + InvasiveUtility.get_invaded_reaches( worst_case) * self.actionParameterObj.costPerTree stateCost = stateCost + InvasiveUtility.get_empty_slots(worst_case) * self.actionParameterObj.emptyCost costAction = InvasiveUtility.get_budget_cost_actions(repmat(3, 1, self.simulationParameterObj.nbrReaches)[0], worst_case, self.actionParameterObj) networkx.adjacency_matrix(self.simulationParameterObj.graph) return "VERSION RL-Glue-3.0 PROBLEMTYPE non-episodic DISCOUNTFACTOR " + str( self.discountFactor) + " OBSERVATIONS INTS (" + str( self.simulationParameterObj.nbrReaches * self.simulationParameterObj.habitatSize) + " 1 3) ACTIONS INTS (" + str( self.simulationParameterObj.nbrReaches) + " 1 4) REWARDS (" + str(self.Bad_Action_Penalty)+" "+str( -1 * (costAction + stateCost)) + ") EXTRA "+str(self.simulationParameterObj.graph.edges()) + " BUDGET "+str(self.actionParameterObj.budget) +" by Majid Taleghan."
def env_step(self, action): action = action.intArray if len(action) != 3: print action, len(action) assert len(action) == self.simulationParameterObj.nbrReaches, "Expected " + str( self.simulationParameterObj.nbrReaches) + " integer action." if not InvasiveUtility.is_action_allowable(action, self.state): theObs = Observation() InvasiveUtility.is_action_allowable(action, self.state) #map(int, results) theObs.intArray = [-1] returnRO = Reward_observation_terminal() returnRO.r = self.Bad_Action_Penalty returnRO.o = theObs return returnRO cost_state_unit = InvasiveUtility.get_unit_invaded_reaches(self.state, self.simulationParameterObj.habitatSize) * self.actionParameterObj.costPerReach stateCost = cost_state_unit + InvasiveUtility.get_invaded_reaches( self.state) * self.actionParameterObj.costPerTree stateCost = stateCost + InvasiveUtility.get_empty_slots(self.state) * self.actionParameterObj.emptyCost costAction = InvasiveUtility.get_budget_cost_actions(action, self.state, self.actionParameterObj) if costAction > self.actionParameterObj.budget: theObs = Observation() InvasiveUtility.is_action_allowable(action, self.state) #map(int, results) theObs.intArray = [-1] returnRO = Reward_observation_terminal() returnRO.r = self.Bad_Action_Penalty returnRO.o = theObs return returnRO nextState = simulateNextState(self.state, action, self.simulationParameterObj, self.actionParameterObj, self.dispertionTable, self.germinationObj) self.state = nextState theObs = Observation() theObs.intArray = self.state returnRO = Reward_observation_terminal() returnRO.r = -1 * (costAction + stateCost) returnRO.o = theObs return returnRO
def env_step(self, action): action = action.intArray assert len(action) == self.simulationParameterObj.nbrReaches, "Expected " + str( self.simulationParameterObj.nbrReaches) + " integer action." if not InvasiveUtility.is_action_allowable(action, self.state): theObs = Observation() InvasiveUtility.is_action_allowable(action, self.state) #map(int, results) theObs.intArray = [-1] returnRO = Reward_observation_terminal() returnRO.r = self.Bad_Action_Penalty returnRO.o = theObs return returnRO cost_state_unit = InvasiveUtility.get_unit_invaded_reaches(self.state, self.simulationParameterObj.habitatSize) * self.actionParameterObj.costPerReach stateCost = cost_state_unit + InvasiveUtility.get_invaded_reaches( self.state) * self.actionParameterObj.costPerTree stateCost = stateCost + InvasiveUtility.get_empty_slots(self.state) * self.actionParameterObj.emptyCost costAction = InvasiveUtility.get_budget_cost_actions(action, self.state, self.actionParameterObj) if costAction > self.actionParameterObj.budget: theObs = Observation() InvasiveUtility.is_action_allowable(action, self.state) #map(int, results) theObs.intArray = [-1] returnRO = Reward_observation_terminal() returnRO.r = self.Bad_Action_Penalty returnRO.o = theObs return returnRO nextState = simulateNextState(self.state, action, self.simulationParameterObj, self.actionParameterObj, self.dispertionTable, self.germinationObj) self.state = nextState theObs = Observation() theObs.intArray = self.state returnRO = Reward_observation_terminal() returnRO.r = -1 * (costAction + stateCost) returnRO.o = theObs return returnRO