def _makeLightToggleAction(self, agent): """ Action to toggle the light switch in a loc that has one. Toggling a switch in a loc affects the light status in all rooms that share its light """ locKey = stateKey(agent.name, 'loc') locsWithLights = set(self.sharedLights.keys()) ## Legal if I'm in a room with a light switch legalityTree = makeTree({ 'if': equalRow(locKey, locsWithLights), True: True, False: False }) action = agent.addAction({'verb': 'toggleLight'}, makeTree(legalityTree)) ## Instead of iterating over locations, I'll iterate over those that have ## switches and create a tree for each affected room for switch, affected in self.sharedLights.items(): for aff in affected: affFlag = stateKey(WORLD, 'light' + str(aff)) txnTree = { 'if': equalRow(locKey, switch), True: { 'if': equalRow(affFlag, True), True: setToConstantMatrix(affFlag, False), False: setToConstantMatrix(affFlag, True) }, False: noChangeMatrix(affFlag) } self.world.setDynamics(affFlag, action, makeTree(txnTree)) self.lightActions[agent.name] = action
def makeRandomFOVDistr(self, agent): fov_key = stateKey(agent.name, FOV_FEATURE) tree = { 'if': equalRow(stateKey(agent.name, 'loc'), self.world_map.all_locations), None: noChangeMatrix(fov_key) } for il, loc in enumerate(self.world_map.all_locations): if loc not in self.victimClrCounts.keys(): tree[il] = setToConstantMatrix(fov_key, 'none') continue sub_tree, leaves = self._make_fov_color_dist(loc, 0) for dist in leaves: prob_dist = Distribution(dist) prob_dist.normalize() dist.clear() weights = [(setToConstantMatrix(fov_key, c), p) for c, p in prob_dist.items() if p > 0] if len(weights) == 1: weights.append((noChangeMatrix(fov_key), 0)) dist['distribution'] = weights tree[il] = sub_tree return tree
def set_phase_dynamics(self): # updates mission phase tree = { 'if': thresholdRow(self.time, MISSION_PHASE_END_TIMES), len(MISSION_PHASE_END_TIMES): setToConstantMatrix(self.phase, MISSION_PHASES[-1]) } for i, phase_time in enumerate(MISSION_PHASE_END_TIMES): tree[i] = setToConstantMatrix(self.phase, MISSION_PHASES[i]) self.setDynamics(self.phase, True, makeTree(tree))
def set_reward(self, agent, weight, model=None): rwd_feat = rewardKey(agent.name) # compares agent's current location rwd_tree = {'if': equalRow(self.location_feat, self.all_locations), None: noChangeMatrix(rwd_feat)} # get binary value according to visitation of location for i, loc in enumerate(self.all_locations): loc_freq_feat = get_num_visits_location_key(agent, loc) rwd_tree[i] = {'if': thresholdRow(loc_freq_feat, 1), True: setToConstantMatrix(rwd_feat, 1), False: setToConstantMatrix(rwd_feat, 0)} agent.setReward(makeTree(rwd_tree), weight * self.normalize_factor, model)
def makeMoveResetFOV(self, agent): fovKey = stateKey(agent.name, 'vicInFOV') for direction in range(4): action = self.moveActions[agent.name][direction] ## Reset FoV tree = setToConstantMatrix(fovKey, 'none') self.world.setDynamics(fovKey, action, makeTree(tree))
def make_single_player_world(player_name, init_loc, loc_neighbors, victims_color_locs, use_unobserved=True, full_obs=False, light_neighbors={}, create_observer=True, logger=logging): # create world and map world = SearchAndRescueWorld() world_map = WorldMap(world, loc_neighbors, light_neighbors) # create victims info victims = Victims(world, victims_color_locs, world_map, full_obs=full_obs, color_prior_p=COLOR_PRIOR_P, color_fov_p=COLOR_FOV_P, color_reqd_times=COLOR_REQD_TIMES) # create (single) triage agent triage_agent = world.addAgent(player_name) world_map.makePlayerLocation(triage_agent, init_loc) victims.setupTriager(triage_agent) world_map.makeMoveResetFOV(triage_agent) victims.createTriageActions(triage_agent) if not full_obs: if use_unobserved: logger.debug('Start to make observable variables and priors') victims.createObsVars4Victims(triage_agent) logger.debug('Made observable variables and priors') victims.makeSearchAction(triage_agent) logger.debug('Made actions for triage agent: {}'.format(triage_agent.name)) triage_agent.setReward( makeTree(setToConstantMatrix(rewardKey(triage_agent.name), 0))) # dummy reward # after all agents are created victims.makeExpiryDynamics() victims.stochasticTriageDur() world.setOrder([{triage_agent.name}]) # observer agent observer = make_observer(world, [triage_agent.name], OBSERVER_NAME) if create_observer else None # adjust agent's beliefs and observations triage_agent.resetBelief() triage_agent.omega = [ key for key in world.state.keys() if not ((key in { modelKey(observer.name if observer is not None else ''), rewardKey(triage_agent.name) }) or (key.find('unobs') > -1)) ] return world, triage_agent, observer, victims, world_map
def get_reward_tree(agent, my_side, other_side): reward_key = rewardKey(agent.name) return makeTree({ 'if': equalRow(my_side, NOT_DECIDED), # if I have not decided True: setToConstantMatrix(reward_key, INVALID), False: { 'if': equalRow(other_side, INVALID), # if other has not decided True: setToConstantMatrix(reward_key, INVALID), False: { 'if': equalFeatureRow(my_side, other_side), # if my_side == other_side True: setToConstantMatrix(reward_key, SAME_SIDE_RWD), False: setToConstantMatrix(reward_key, DIFF_SIDES_RWD) } } })
def makeExpiryDynamics(self): vic_colors = [ color for color in self.color_names if color not in {WHITE_STR, RED_STR} ] # update victim loc counters for loc in self.world_map.all_locations: red_ctr = stateKey(WORLD, 'ctr_' + loc + '_' + RED_STR) for color in vic_colors: ctr = stateKey(WORLD, 'ctr_' + loc + '_' + color) expire = self.color_expiry[color] # RED: if death time is reached, copy amount of alive victims to counter deathTree = { 'if': thresholdRow(self.world.time, expire), True: addFeatureMatrix(red_ctr, ctr), False: noChangeMatrix(red_ctr) } self.world.setDynamics(red_ctr, True, makeTree(deathTree)) # GREEN and GOLD: if death time reached, zero-out alive victims of that color deathTree = { 'if': thresholdRow(self.world.time, expire), True: setToConstantMatrix(ctr, 0), False: noChangeMatrix(ctr) } self.world.setDynamics(ctr, True, makeTree(deathTree))
def set_constant_reward(agent, value): """ Gets a matrix that sets the reward the reward of the given agent to a constant value. :param Agent agent: the agent we want to set the reward. :param float value: the value we want to set the reward to. :rtype: KeyedMatrix :return: a matrix that allows setting the agent's reward to the given constant value. """ return setToConstantMatrix(rewardKey(agent.name), value)
def _sense1Location(self, beepKey, nbrLoc): nbrYCt = stateKey(WORLD, 'ctr_' + nbrLoc + '_' + GOLD_STR) nbrGCt = stateKey(WORLD, 'ctr_' + nbrLoc + '_' + GREEN_STR) yDistr = {'2': 1 - PROB_NO_BEEP, 'none': PROB_NO_BEEP} gDistr = {'1': 1 - PROB_NO_BEEP, 'none': PROB_NO_BEEP} if PROB_NO_BEEP == 0: tree = { 'if': thresholdRow(nbrYCt, 0), True: setToConstantMatrix(beepKey, '2'), False: { 'if': thresholdRow(nbrGCt, 0), True: setToConstantMatrix(beepKey, '1'), False: setToConstantMatrix(beepKey, 'none') } } return tree tree = { 'if': thresholdRow(nbrYCt, 0), True: { 'distribution': [(setToConstantMatrix(beepKey, c), p) for c, p in yDistr.items()] }, False: { 'if': thresholdRow(nbrGCt, 0), True: { 'distribution': [(setToConstantMatrix(beepKey, c), p) for c, p in gDistr.items()] }, False: setToConstantMatrix(beepKey, 'none') } } return tree
def get_reward_tree(agent, my_dec, other_dec): reward_key = rewardKey(agent.name) return makeTree({ 'if': equalRow(my_dec, NOT_DECIDED), # if I have not decided True: setToConstantMatrix(reward_key, INVALID), False: { 'if': equalRow(other_dec, NOT_DECIDED), # if other has not decided True: setToConstantMatrix(reward_key, INVALID), False: { 'if': equalRow(my_dec, COOPERATED), # if I cooperated True: { 'if': equalRow(other_dec, COOPERATED), # if other cooperated True: setToConstantMatrix(reward_key, MUTUAL_COOP), # both cooperated False: setToConstantMatrix(reward_key, SUCKER) }, False: { 'if': equalRow(other_dec, COOPERATED), # if I defected and other cooperated True: setToConstantMatrix(reward_key, TEMPTATION), False: setToConstantMatrix(reward_key, PUNISHMENT) } } } })
def _createSensorDyn(self, human): for d in Directions: beepKey = stateKey(human.name, 'sensor_' + d.name) locsWithNbrs = list(self.world_map.neighbors[d.value].keys()) tree = { 'if': equalRow(makeFuture(stateKey(human.name, 'loc')), locsWithNbrs), None: setToConstantMatrix(beepKey, 'none') } for il, loc in enumerate(locsWithNbrs): nbr = self.world_map.neighbors[d.value][loc] tree[il] = self._sense1Location(beepKey, nbr) self.world.setDynamics(beepKey, True, makeTree(tree))
def _makeMoveActions(self, agent): """ N/E/S/W actions Legality: if current location has a neighbor in the given direction Dynamics: 1) change human's location; 2) set the seen flag for new location to True 3) Set the observable victim variables to the first victim at the new location, if any 4) Reset the crosshair/approached vars to none """ self.moveActions[agent.name] = [] locKey = stateKey(agent.name, 'loc') for direction in Directions: # Legal if current location has a neighbor in the given direction locsWithNbrs = set(self.neighbors[direction.value].keys()) legalityTree = makeTree({ 'if': equalRow(locKey, locsWithNbrs), True: True, False: False }) action = agent.addAction({ 'verb': 'move', 'object': direction.name }, legalityTree) self.moveActions[agent.name].append(action) # Dynamics of this move action: change the agent's location to 'this' location lstlocsWithNbrs = list(locsWithNbrs) tree = {'if': equalRow(locKey, lstlocsWithNbrs)} for il, loc in enumerate(lstlocsWithNbrs): tree[il] = setToConstantMatrix( locKey, self.neighbors[direction.value][loc]) self.world.setDynamics(locKey, action, makeTree(tree)) # move increments the counter of the location we moved to for dest in self.all_locations: destKey = stateKey(agent.name, 'locvisits_' + str(dest)) tree = makeTree({ 'if': equalRow(makeFuture(locKey), dest), True: incrementMatrix(destKey, 1), False: noChangeMatrix(destKey) }) self.world.setDynamics(destKey, action, tree) # increment time self.world.setDynamics( self.world.time, action, makeTree(incrementMatrix(self.world.time, MOVE_TIME_INC)))
def makeSearchAction(self, agent): action = agent.addAction({'verb': 'search'}) # default: FOV is none fov_key = stateKey(agent.name, FOV_FEATURE) self.world.setDynamics(fov_key, True, makeTree(setToConstantMatrix(fov_key, 'none'))) # A victim can randomly appear in FOV fov_tree = self.makeRandomFOVDistr(agent) self.world.setDynamics(fov_key, action, makeTree(fov_tree)) # increment time self.world.setDynamics( self.world.time, action, makeTree(incrementMatrix(self.world.time, SEARCH_TIME_INC))) self.searchActs[agent.name] = action
def tree_from_univariate_samples(set_var, x_var, x_params, sample_values, idx_min=0, idx_max=-1): """ Creates a PWL dynamics tree that sets the value of one feature according to the value of another as provided by a given set of samples. This a recursive function that creates a binary search tree to determine the "best match" for the value of the parameter feature. :param str set_var: the feature (named key) on which to store the approximation. :param str x_var: the feature (named key) providing the parameter value from which to calculate the approximation. :param np.ndarray x_params: an array of shape (num_samples, ) containing the values for the parameters. :param np.ndarray sample_values: an array of shape (num_samples, ) with the function values for each parameter value. :param int idx_min: the lower index of the current binary search. :param int idx_max: the upper index of the current binary search. -1 corresponds to num_samples - 1. :rtype: dict :return: a dictionary to be used with makeTree to define the dynamics of the approximation of the function that produced the given samples. """ # checks indexes if idx_max == -1: idx_max = len(x_params) - 1 # checks termination (leaf), sets to index's value if idx_min == idx_max: return setToConstantMatrix(set_var, sample_values[idx_max]) # builds binary search tree idx = (idx_max + idx_min) // 2 x = x_params[idx] return { 'if': multi_compare_row({x_var: 1}, x), # if var is greater than x True: tree_from_univariate_samples(set_var, x_var, x_params, sample_values, idx + 1, idx_max), # search right False: tree_from_univariate_samples(set_var, x_var, x_params, sample_values, idx_min, idx) } # search left
agents = [agent1, agent2] for agent in agents: # set agent's params agent.setAttribute('discount', 1) agent.setHorizon(1) agent.setAttribute('selection', TIEBREAK) # add 'side chosen' variable (0 = didn't decide, 1 = went left, 2 = went right) side = world.defineState(agent.name, 'side', list, [NOT_DECIDED, WENT_LEFT, WENT_RIGHT]) world.setFeature(side, NOT_DECIDED) sides.append(side) # define agents' actions (left and right) action = agent.addAction({'verb': '', 'action': 'go left'}) tree = makeTree(setToConstantMatrix(side, WENT_LEFT)) world.setDynamics(side, action, tree) lefts.append(action) action = agent.addAction({'verb': '', 'action': 'go right'}) tree = makeTree(setToConstantMatrix(side, WENT_RIGHT)) world.setDynamics(side, action, tree) rights.append(action) # create a new model for the agent agent.addModel(get_fake_model_name(agent), parent=agent.get_true_model()) # defines payoff matrices agent1.setReward(get_reward_tree(agent1, sides[0], sides[1]), 1) agent2.setReward(get_reward_tree(agent2, sides[1], sides[0]), 1)
# defect (not legal if other has cooperated before, legal only if agent itself did not defect before) action = agent.addAction({ 'verb': '', 'action': 'defect' }, makeTree({ 'if': equalRow(other_dec, COOPERATED), True: { 'if': equalRow(my_dec, DEFECTED), True: True, False: False }, False: True })) tree = makeTree(setToConstantMatrix(my_dec, DEFECTED)) world.setDynamics(my_dec, action, tree) # cooperate (not legal if other or agent itself defected before) action = agent.addAction({ 'verb': '', 'action': 'cooperate' }, makeTree({ 'if': equalRow(other_dec, DEFECTED), True: False, False: { 'if': equalRow(my_dec, DEFECTED), True: False, False: True }
hi=100) world.setFeature(var_rcv_amnt, 0) # add producer actions # produce capacity: if half capacity then 0.5*asked amount else asked amount) act_prod = ag_producer.addAction({'verb': '', 'action': 'produce'}) tree = makeTree({ 'if': equalRow(var_half_cap, True), True: multi_set_matrix(var_rcv_amnt, {var_ask_amnt: 0.5}), False: setToFeatureMatrix(var_rcv_amnt, var_ask_amnt) }) world.setDynamics(var_rcv_amnt, act_prod, tree) # add consumer actions (ask more = 10 / less = 5) act_ask_more = ag_consumer.addAction({'verb': '', 'action': 'ask_more'}) tree = makeTree(setToConstantMatrix(var_ask_amnt, 10)) world.setDynamics(var_ask_amnt, act_ask_more, tree) act_ask_less = ag_consumer.addAction({'verb': '', 'action': 'ask_less'}) tree = makeTree(setToConstantMatrix(var_ask_amnt, 5)) world.setDynamics(var_ask_amnt, act_ask_less, tree) # defines payoff for consumer agent: if received amount > 5 then 10 - rcv_amnt (penalty) else rcv_amount (reward) # this simulates over-stock cost, best is to receive max of 5, more than this has costs ag_consumer.setReward( makeTree({ 'if': thresholdRow(var_rcv_amnt, 5), True: multi_reward_matrix(ag_consumer, { CONSTANT: 10,
for agent in agents: # set agent's params agent.setAttribute('discount', 1) agent.setAttribute('selection', TIEBREAK) agent.setHorizon(1) # agent.setRecursiveLevel(1) # add "decision" variable (0 = didn't decide, 1 = went straight, 2 = swerved) dec = world.defineState(agent.name, 'decision', list, [NOT_DECIDED, WENT_STRAIGHT, SWERVED]) world.setFeature(dec, NOT_DECIDED) agents_dec.append(dec) # define agents' actions (defect and cooperate) action = agent.addAction({'verb': '', 'action': 'go straight'}) tree = makeTree(setToConstantMatrix(dec, WENT_STRAIGHT)) world.setDynamics(dec, action, tree) action = agent.addAction({'verb': '', 'action': 'swerve'}) tree = makeTree(setToConstantMatrix(dec, SWERVED)) world.setDynamics(dec, action, tree) # defines payoff matrices agent1.setReward(get_reward_tree(agent1, agents_dec[0], agents_dec[1]), 1) agent2.setReward(get_reward_tree(agent2, agents_dec[1], agents_dec[0]), 1) # define order my_turn_order = [{agent1.name, agent2.name}] world.setOrder(my_turn_order) # add true mental model of the other to each agent world.setMentalModel(agent1.name, agent2.name,
def _createTriageAction(self, agent, color): fov_key = stateKey(agent.name, FOV_FEATURE) loc_key = stateKey(agent.name, 'loc') legal = {'if': equalRow(fov_key, color), True: True, False: False} action = agent.addAction({'verb': 'triage_' + color}, makeTree(legal)) if color == GREEN_STR: threshold = 7 else: threshold = 14 longEnough = differenceRow(makeFuture(self.world.time), self.world.time, threshold) for loc in self.world_map.all_locations: # successful triage conditions conds = [ equalRow(fov_key, color), equalRow(loc_key, loc), longEnough ] # location-specific counter of vics of this color: if successful, decrement vicsInLocOfClrKey = stateKey(WORLD, 'ctr_' + loc + '_' + color) tree = makeTree( anding(conds, incrementMatrix(vicsInLocOfClrKey, -1), noChangeMatrix(vicsInLocOfClrKey))) self.world.setDynamics(vicsInLocOfClrKey, action, tree) # white: increment vicsInLocOfClrKey = stateKey(WORLD, 'ctr_' + loc + '_' + WHITE_STR) tree = makeTree( anding(conds, incrementMatrix(vicsInLocOfClrKey, 1), noChangeMatrix(vicsInLocOfClrKey))) self.world.setDynamics(vicsInLocOfClrKey, action, tree) # Fov update to white tree = { 'if': longEnough, True: setToConstantMatrix(fov_key, WHITE_STR), False: noChangeMatrix(fov_key) } self.world.setDynamics(fov_key, action, makeTree(tree)) # Color saved counter: increment saved_key = stateKey(agent.name, 'numsaved_' + color) tree = { 'if': longEnough, True: incrementMatrix(saved_key, 1), False: noChangeMatrix(saved_key) } self.world.setDynamics(saved_key, action, makeTree(tree)) # Color saved: according to difference diff = {makeFuture(saved_key): 1, saved_key: -1} saved_key = stateKey(agent.name, 'saved_' + color) self.world.setDynamics(saved_key, action, makeTree(dynamicsMatrix(saved_key, diff))) self.world.setDynamics( saved_key, True, makeTree(setFalseMatrix(saved_key))) # default: set to False # increment time self.world.setDynamics( self.world.time, action, makeTree(incrementMatrix(self.world.time, threshold))) self.triageActs[agent.name][color] = action
def tree_from_bivariate_samples(set_var, x_var, y_var, x_params, y_params, sample_values, idx_x_min=0, idx_x_max=-1, idx_y_min=0, idx_y_max=-1): """ Creates a PWL dynamics tree that sets the value of one feature according to the value of two other as provided by a given set of samples. This a recursive function that creates two intertwined binary search trees to determine the "best match" for the value of the parameter feature pair. :param str set_var: the feature (named key) on which to store the approximation. :param str x_var: the feature (named key) providing the x-parameter value from which to calculate the approximation. :param str y_var: the feature (named key) providing the y-parameter value from which to calculate the approximation. :param np.ndarray x_params: an array of shape (num_x_samples, ) containing the values for the x parameters. :param np.ndarray y_params: an array of shape (num_y_samples, ) containing the values for the y parameters. :param np.ndarray sample_values: an array of shape (num_x_samples, num_y_samples) with the function values for each parameter pair. :param int idx_x_min: the lower x-index of the current binary search. :param int idx_x_max: the upper x-index of the current binary search. -1 corresponds to num_x_samples - 1. :param int idx_y_min: the lower y-index of the current binary search. :param int idx_y_max: the upper y-index of the current binary search. -1 corresponds to num_y_samples - 1. :rtype: dict :return: a dictionary to be used with makeTree to define the dynamics of the approximation of the function that produced the given samples. """ # checks indexes if idx_x_max == -1: idx_x_max = len(x_params) - 1 if idx_y_max == -1: idx_y_max = len(y_params) - 1 # checks termination (leaf), sets to index's value if idx_x_min == idx_x_max and idx_y_min == idx_y_max: return setToConstantMatrix(set_var, sample_values[idx_x_max, idx_y_max]) # tests for hyperplane in x_axis, performs binary search in y-axis if idx_x_min == idx_x_max: idx_y = (idx_y_max + idx_y_min) // 2 y = y_params[idx_y] return { 'if': multi_compare_row({y_var: -1}, -y), # if y var is less than y True: tree_from_bivariate_samples( # search left set_var, x_var, y_var, x_params, y_params, sample_values, idx_x_min, idx_x_max, idx_y_min, idx_y), False: tree_from_bivariate_samples( # search right set_var, x_var, y_var, x_params, y_params, sample_values, idx_x_min, idx_x_max, idx_y + 1, idx_y_max) } # otherwise performs binary search in x-axis idx_x = (idx_x_max + idx_x_min) // 2 x = x_params[idx_x] return { 'if': multi_compare_row({x_var: -1}, -x), # if x var is less than x True: tree_from_bivariate_samples( # search left set_var, x_var, y_var, x_params, y_params, sample_values, idx_x_min, idx_x, idx_y_min, idx_y_max), False: tree_from_bivariate_samples( # search right set_var, x_var, y_var, x_params, y_params, sample_values, idx_x + 1, idx_x_max, idx_y_min, idx_y_max) }