def _makeLightToggleAction(self, agent): """ Action to toggle the light switch in a loc that has one. Toggling a switch in a loc affects the light status in all rooms that share its light """ locKey = stateKey(agent.name, 'loc') locsWithLights = set(self.sharedLights.keys()) ## Legal if I'm in a room with a light switch legalityTree = makeTree({ 'if': equalRow(locKey, locsWithLights), True: True, False: False }) action = agent.addAction({'verb': 'toggleLight'}, makeTree(legalityTree)) ## Instead of iterating over locations, I'll iterate over those that have ## switches and create a tree for each affected room for switch, affected in self.sharedLights.items(): for aff in affected: affFlag = stateKey(WORLD, 'light' + str(aff)) txnTree = { 'if': equalRow(locKey, switch), True: { 'if': equalRow(affFlag, True), True: setToConstantMatrix(affFlag, False), False: setToConstantMatrix(affFlag, True) }, False: noChangeMatrix(affFlag) } self.world.setDynamics(affFlag, action, makeTree(txnTree)) self.lightActions[agent.name] = action
def set_action_legality(agent, action, legality=True, models=None): """ Sets legality for an action for the given agent and model. :param Agent agent: the agent whose model(s) we want to set the action legality. :param ActionSet action: the action for which to set the legality. :param bool legality: whether to set this action legal (True) or illegal (False) :param list[str] models: the list of models for which to set the action legality. None will set to the agent itself. """ # tests for "true" model if models is None or len(models) == 0: agent.setLegal(action, makeTree(legality)) return model_key = modelKey(agent.name) # initial tree (end condition is: 'not legality') tree = not legality # recursively builds legality tree by comparing the model's key with the index of the model in the state/vector for model in models: tree = { 'if': equalRow(model_key, agent.model2index(model)), True: legality, False: tree } agent.setLegal(action, makeTree(tree))
def makeExpiryDynamics(self): vic_colors = [ color for color in self.color_names if color not in {WHITE_STR, RED_STR} ] # update victim loc counters for loc in self.world_map.all_locations: red_ctr = stateKey(WORLD, 'ctr_' + loc + '_' + RED_STR) for color in vic_colors: ctr = stateKey(WORLD, 'ctr_' + loc + '_' + color) expire = self.color_expiry[color] # RED: if death time is reached, copy amount of alive victims to counter deathTree = { 'if': thresholdRow(self.world.time, expire), True: addFeatureMatrix(red_ctr, ctr), False: noChangeMatrix(red_ctr) } self.world.setDynamics(red_ctr, True, makeTree(deathTree)) # GREEN and GOLD: if death time reached, zero-out alive victims of that color deathTree = { 'if': thresholdRow(self.world.time, expire), True: setToConstantMatrix(ctr, 0), False: noChangeMatrix(ctr) } self.world.setDynamics(ctr, True, makeTree(deathTree))
def _get_decision_tree( self, expr: Dict, leaf_func: Callable[[Dict], Dict or KeyedMatrix]) -> KeyedTree: # check if root operation is a logical expression op = next(iter(expr.keys())) if len(expr) == 1 and op in \ {'linear_and', 'logic_and', 'linear_or', 'logic_or', 'not', 'equiv', 'imply', 'eq', 'neq', 'gt', 'lt', 'geq', 'leq', 'action'}: if _get_const_val(expr[op]) is not None: # no need for tree if it's a constant value return self._get_decision_tree(expr[op], leaf_func) # otherwise return a tree that gets the truth value of the expression return self._get_decision_tree( self._get_if_tree(expr, {CONSTANT: True}, {CONSTANT: False}), leaf_func) if 'if' in expr and len(expr) > 1: # check if no plane (branch) provided, then expression's truth value has to be resolved if isinstance(expr['if'], dict) and len(expr['if']) == 1: return self._get_decision_tree( self._get_if_tree(expr['if'], expr[True], expr[False]), leaf_func) assert isinstance( expr['if'], KeyedPlane ), f'Could not parse RDDL expression, got invalid branch: "{expr}"!' # otherwise just create a PsychSim decision tree tree = { child: self._get_decision_tree(expr[child], leaf_func) for child in expr if child != 'if' } tree['if'] = expr['if'] return makeTree(tree) if 'switch' in expr and len(expr) == 1: return self._get_decision_tree( self._get_switch_tree(*expr['switch']), leaf_func) if 'distribution' in expr and len(expr) == 1: # create stochastic tree return makeTree({ 'distribution': [(leaf_func(v), p) for v, p in expr['distribution']] }) # just return expression's value return makeTree(leaf_func(expr))
def get_reward_tree(agent, my_dec, other_dec): reward_key = rewardKey(agent.name) return makeTree({ 'if': equalRow(my_dec, NOT_DECIDED), # if I have not decided True: setToConstantMatrix(reward_key, INVALID), False: { 'if': equalRow(other_dec, NOT_DECIDED), # if other has not decided True: setToConstantMatrix(reward_key, INVALID), False: { 'if': equalRow(my_dec, COOPERATED), # if I cooperated True: { 'if': equalRow(other_dec, COOPERATED), # if other cooperated True: setToConstantMatrix(reward_key, MUTUAL_COOP), # both cooperated False: setToConstantMatrix(reward_key, SUCKER) }, False: { 'if': equalRow(other_dec, COOPERATED), # if I defected and other cooperated True: setToConstantMatrix(reward_key, TEMPTATION), False: setToConstantMatrix(reward_key, PUNISHMENT) } } } })
def make_single_player_world(player_name, init_loc, loc_neighbors, victims_color_locs, use_unobserved=True, full_obs=False, light_neighbors={}, create_observer=True, logger=logging): # create world and map world = SearchAndRescueWorld() world_map = WorldMap(world, loc_neighbors, light_neighbors) # create victims info victims = Victims(world, victims_color_locs, world_map, full_obs=full_obs, color_prior_p=COLOR_PRIOR_P, color_fov_p=COLOR_FOV_P, color_reqd_times=COLOR_REQD_TIMES) # create (single) triage agent triage_agent = world.addAgent(player_name) world_map.makePlayerLocation(triage_agent, init_loc) victims.setupTriager(triage_agent) world_map.makeMoveResetFOV(triage_agent) victims.createTriageActions(triage_agent) if not full_obs: if use_unobserved: logger.debug('Start to make observable variables and priors') victims.createObsVars4Victims(triage_agent) logger.debug('Made observable variables and priors') victims.makeSearchAction(triage_agent) logger.debug('Made actions for triage agent: {}'.format(triage_agent.name)) triage_agent.setReward( makeTree(setToConstantMatrix(rewardKey(triage_agent.name), 0))) # dummy reward # after all agents are created victims.makeExpiryDynamics() victims.stochasticTriageDur() world.setOrder([{triage_agent.name}]) # observer agent observer = make_observer(world, [triage_agent.name], OBSERVER_NAME) if create_observer else None # adjust agent's beliefs and observations triage_agent.resetBelief() triage_agent.omega = [ key for key in world.state.keys() if not ((key in { modelKey(observer.name if observer is not None else ''), rewardKey(triage_agent.name) }) or (key.find('unobs') > -1)) ] return world, triage_agent, observer, victims, world_map
def run_univariate_function(name, symbol_fmt, func): print('\n*************************************') print('Testing {} function'.format(name)) # PsychSim elements world = World() agent = Agent('The Agent') world.addAgent(agent) # gets samples from real non-linear function x_params, y_params, sample_values = \ get_bivariate_samples(func, MIN_X, MAX_X, MIN_Y, MAX_Y, NUM_SAMPLES, NUM_SAMPLES) sample_mean = np.nanmean(sample_values) # create two features: one holding the variable, the other the result (dependent) var_x = world.defineState(agent.name, 'var_x', float, lo=MIN_X, hi=MAX_X) var_y = world.defineState(agent.name, 'var_y', float, lo=MIN_Y, hi=MAX_Y) result = world.defineState(agent.name, 'result', float, lo=np.min(sample_values), hi=np.max(sample_values)) world.setFeature(result, 0) # create action that is approximates the function, storing the result in the result feature action = agent.addAction({'verb': 'operation', 'action': name}) tree = makeTree( tree_from_bivariate_samples(result, var_x, var_y, x_params, y_params, sample_values)) world.setDynamics(result, action, tree) world.setOrder([agent.name]) np.random.seed(SEED) values_original = [] values_approx = [] for i in range(NUM_TEST_SAMPLES): # gets random sample parameters x = MIN_X + np.random.rand() * (MAX_X - MIN_X) y = MIN_Y + np.random.rand() * (MAX_Y - MIN_Y) # sets variable and updates result world.setFeature(var_x, x) world.setFeature(var_y, y) world.step() real = func(x, y) psych = world.getValue(result) print('{:3}: {:30} | Expected: {:10.2f} | PsychSim: {:10.2f}'.format( i, symbol_fmt.format(x, y), real, psych)) values_original.append(real) values_approx.append(psych) # gets error stats rmse = np.sqrt(np.mean((np.array(values_approx) - values_original)**2)) print('=====================================') print('RMSE = {:.3f}'.format(rmse)) print('\nPress \'Enter\' to continue...') input()
def makeMoveResetFOV(self, agent): fovKey = stateKey(agent.name, 'vicInFOV') for direction in range(4): action = self.moveActions[agent.name][direction] ## Reset FoV tree = setToConstantMatrix(fovKey, 'none') self.world.setDynamics(fovKey, action, makeTree(tree))
def _makeMoveActions(self, agent): """ N/E/S/W actions Legality: if current location has a neighbor in the given direction Dynamics: 1) change human's location; 2) set the seen flag for new location to True 3) Set the observable victim variables to the first victim at the new location, if any 4) Reset the crosshair/approached vars to none """ self.moveActions[agent.name] = [] locKey = stateKey(agent.name, 'loc') for direction in Directions: # Legal if current location has a neighbor in the given direction locsWithNbrs = set(self.neighbors[direction.value].keys()) legalityTree = makeTree({ 'if': equalRow(locKey, locsWithNbrs), True: True, False: False }) action = agent.addAction({ 'verb': 'move', 'object': direction.name }, legalityTree) self.moveActions[agent.name].append(action) # Dynamics of this move action: change the agent's location to 'this' location lstlocsWithNbrs = list(locsWithNbrs) tree = {'if': equalRow(locKey, lstlocsWithNbrs)} for il, loc in enumerate(lstlocsWithNbrs): tree[il] = setToConstantMatrix( locKey, self.neighbors[direction.value][loc]) self.world.setDynamics(locKey, action, makeTree(tree)) # move increments the counter of the location we moved to for dest in self.all_locations: destKey = stateKey(agent.name, 'locvisits_' + str(dest)) tree = makeTree({ 'if': equalRow(makeFuture(locKey), dest), True: incrementMatrix(destKey, 1), False: noChangeMatrix(destKey) }) self.world.setDynamics(destKey, action, tree) # increment time self.world.setDynamics( self.world.time, action, makeTree(incrementMatrix(self.world.time, MOVE_TIME_INC)))
def makeSearchAction(self, agent): action = agent.addAction({'verb': 'search'}) # default: FOV is none fov_key = stateKey(agent.name, FOV_FEATURE) self.world.setDynamics(fov_key, True, makeTree(setToConstantMatrix(fov_key, 'none'))) # A victim can randomly appear in FOV fov_tree = self.makeRandomFOVDistr(agent) self.world.setDynamics(fov_key, action, makeTree(fov_tree)) # increment time self.world.setDynamics( self.world.time, action, makeTree(incrementMatrix(self.world.time, SEARCH_TIME_INC))) self.searchActs[agent.name] = action
def set_phase_dynamics(self): # updates mission phase tree = { 'if': thresholdRow(self.time, MISSION_PHASE_END_TIMES), len(MISSION_PHASE_END_TIMES): setToConstantMatrix(self.phase, MISSION_PHASES[-1]) } for i, phase_time in enumerate(MISSION_PHASE_END_TIMES): tree[i] = setToConstantMatrix(self.phase, MISSION_PHASES[i]) self.setDynamics(self.phase, True, makeTree(tree))
def set_reward(self, agent, weight, model=None): rwd_feat = rewardKey(agent.name) # compares agent's current location rwd_tree = {'if': equalRow(self.location_feat, self.all_locations), None: noChangeMatrix(rwd_feat)} # get visitation count according to location for i, loc in enumerate(self.all_locations): loc_freq_feat = get_num_visits_location_key(agent, loc) rwd_tree[i] = dynamicsMatrix(rwd_feat, {self.time_feat: 1., loc_freq_feat: -1.}) \ if self.inverse else setToFeatureMatrix(rwd_feat, loc_freq_feat) agent.setReward(makeTree(rwd_tree), weight * self.normalize_factor, model)
def _createSensorDyn(self, human): for d in Directions: beepKey = stateKey(human.name, 'sensor_' + d.name) locsWithNbrs = list(self.world_map.neighbors[d.value].keys()) tree = { 'if': equalRow(makeFuture(stateKey(human.name, 'loc')), locsWithNbrs), None: setToConstantMatrix(beepKey, 'none') } for il, loc in enumerate(locsWithNbrs): nbr = self.world_map.neighbors[d.value][loc] tree[il] = self._sense1Location(beepKey, nbr) self.world.setDynamics(beepKey, True, makeTree(tree))
def set_reward(self, agent, weight, model=None): rwd_feat = rewardKey(agent.name) # compares agent's current location rwd_tree = {'if': equalRow(self.location_feat, self.all_locations), None: noChangeMatrix(rwd_feat)} # get binary value according to visitation of location for i, loc in enumerate(self.all_locations): loc_freq_feat = get_num_visits_location_key(agent, loc) rwd_tree[i] = {'if': thresholdRow(loc_freq_feat, 1), True: setToConstantMatrix(rwd_feat, 1), False: setToConstantMatrix(rwd_feat, 0)} agent.setReward(makeTree(rwd_tree), weight * self.normalize_factor, model)
def stochasticTriageDur(self): vic_colors = [ color for color in self.color_names if color not in {WHITE_STR, RED_STR} ] for color in vic_colors: stochTree = { 'distribution': [(incrementMatrix(self.world.time, c), p) for c, p in self.color_reqd_times[color].items()] } for actions in self.triageActs.values(): triageActColor = actions[color] self.world.setDynamics(self.world.time, triageActColor, makeTree(stochTree))
def get_reward_tree(agent, my_side, other_side): reward_key = rewardKey(agent.name) return makeTree({ 'if': equalRow(my_side, NOT_DECIDED), # if I have not decided True: setToConstantMatrix(reward_key, INVALID), False: { 'if': equalRow(other_side, INVALID), # if other has not decided True: setToConstantMatrix(reward_key, INVALID), False: { 'if': equalFeatureRow(my_side, other_side), # if my_side == other_side True: setToConstantMatrix(reward_key, SAME_SIDE_RWD), False: setToConstantMatrix(reward_key, DIFF_SIDES_RWD) } } })
def makeVictimReward(self, agent, model=None, rwd_dict=None): """ Human gets reward if flag is set """ # collects victims saved of each color weights = {} for color in self.color_names: rwd = rwd_dict[color] if rwd_dict is not None and color in rwd_dict else \ COLOR_REWARDS[color] if color in COLOR_REWARDS else None if rwd is None or rwd == 0: continue saved_key = stateKey(agent.name, 'saved_' + color) weights[saved_key] = rwd rwd_key = rewardKey(agent.name) agent.setReward(makeTree(dynamicsMatrix(rwd_key, weights)), 1., model)
def test_actions_legal_const(self): rddl = ''' domain my_test { pvariables { p : { state-fluent, int, default = 0 }; q : { non-fluent, int, default = 1 }; a1 : { action-fluent, bool, default = false }; a2 : { action-fluent, bool, default = false }; }; cpfs { p' = if (a1') then p + 2 else if (a2') then p - 2 else 100; }; reward = p; state-action-constraints { a1 => q > 1; a2 => q <= 1; }; } non-fluents my_test_empty { domain = my_test; } instance my_test_inst { domain = my_test; init-state { a1; }; horizon = 3; } ''' conv = Converter(const_as_assert=True) conv.convert_str(rddl) ag_name, agent = next(iter(conv.world.agents.items())) a1 = conv.actions[ag_name]['a1'] a2 = conv.actions[ag_name]['a2'] self.assertIn(a1, agent.legal) self.assertEqual(agent.legal[a1], makeTree(False)) self.assertNotIn(a2, agent.legal) legal_acts = conv.world.agents[ag_name].getLegalActions() self.assertIn(a2, legal_acts) p_ = conv.world.getState(WORLD, 'p', unique=True) self.assertEqual(p_, 0) conv.world.step() p = conv.world.getState(WORLD, 'p', unique=True) self.assertEqual(p, p_ - 2)
def _get_action_legality( self, expr: Dict ) -> Union[Tuple[Agent, ActionSet, KeyedTree], None, bool]: # check for action legality constraint in the form "action => constraint" if 'imply' in expr and 'action' in expr['imply'][0]: agent = expr['imply'][0]['action'][0] action = expr['imply'][0]['action'][1] # get condition expression as 'if' legality tree legal_tree = self._get_legality_tree( self._get_if_tree(expr['imply'][1], {CONSTANT: True}, {CONSTANT: False})) return agent, action, legal_tree if 'action' in expr or _get_const_val(expr, bool): # if constraint is true or only depends on action, then no need to set legality (always legal) return True if 'not' in expr and 'action' in expr['not']: # if "not action", probably rhs of implication was False, so action always illegal agent = expr['not']['action'][0] action = expr['not']['action'][1] return agent, action, makeTree(False) return None # could not find action legality constraint in the expression
def _createTriageAction(self, agent, color): loc_key = stateKey(agent.name, 'loc') # legal only if any "active" victim of given color is in the same loc tree = { 'if': equalRow(loc_key, self.world_map.all_locations), None: False } for i, loc in enumerate(self.world_map.all_locations): vicsInLocOfClrKey = stateKey(WORLD, 'ctr_' + loc + '_' + color) tree[i] = { 'if': thresholdRow(vicsInLocOfClrKey, 0), True: True, False: False } action = agent.addAction({'verb': 'triage_' + color}, makeTree(tree)) # different triage time thresholds according to victim type threshold = 7 if color == GREEN_STR else 14 long_enough = differenceRow(makeFuture(self.world.time), self.world.time, threshold) # make triage dynamics for counters of each loc for loc in self.world_map.all_locations: # successful triage conditions conds = [equalRow(loc_key, loc), long_enough] # location-specific counter of vics of this color: if successful, decrement vicsInLocOfClrKey = stateKey(WORLD, 'ctr_' + loc + '_' + color) tree = makeTree( anding(conds, incrementMatrix(vicsInLocOfClrKey, -1), noChangeMatrix(vicsInLocOfClrKey))) self.world.setDynamics(vicsInLocOfClrKey, action, tree) # white: increment vicsInLocOfClrKey = stateKey(WORLD, 'ctr_' + loc + '_' + WHITE_STR) tree = makeTree( anding(conds, incrementMatrix(vicsInLocOfClrKey, 1), noChangeMatrix(vicsInLocOfClrKey))) self.world.setDynamics(vicsInLocOfClrKey, action, tree) # Color saved counter: increment saved_key = stateKey(agent.name, 'numsaved_' + color) tree = { 'if': long_enough, True: incrementMatrix(saved_key, 1), False: noChangeMatrix(saved_key) } self.world.setDynamics(saved_key, action, makeTree(tree)) # Color saved: according to difference diff = {makeFuture(saved_key): 1, saved_key: -1} saved_key = stateKey(agent.name, 'saved_' + color) self.world.setDynamics(saved_key, action, makeTree(dynamicsMatrix(saved_key, diff))) self.world.setDynamics( saved_key, True, makeTree(setFalseMatrix(saved_key))) # default: set to False # increment time self.world.setDynamics( self.world.time, action, makeTree(incrementMatrix(self.world.time, threshold))) self.triageActs[agent.name][color] = action
def setup(): global args np.random.seed(args.seed) # create world and add agents world = World() world.memory = False world.parallel = args.parallel agents = [] agent_features = {} for ag in range(args.agents): agent = Agent('Agent' + str(ag)) world.addAgent(agent) agents.append(agent) # set agent's params agent.setAttribute('discount', 1) agent.setHorizon(args.horizon) # add features, initialize at random features = [] agent_features[agent] = features for f in range(args.features_agent): feat = world.defineState(agent.name, 'Feature{}'.format(f), int, lo=0, hi=1000) world.setFeature(feat, np.random.randint(0, MAX_FEATURE_VALUE)) features.append(feat) # set random reward function agent.setReward(maximizeFeature(np.random.choice(features), agent.name), 1) # add mental copy of true model and make it static (we do not have beliefs in the models) agent.addModel(get_fake_model_name(agent), parent=get_true_model_name(agent)) agent.setAttribute('static', True, get_fake_model_name(agent)) # add actions for ac in range(args.actions): action = agent.addAction({'verb': '', 'action': 'Action{}'.format(ac)}) i = ac while i + args.features_action < args.features_agent: weights = {} for j in range(args.features_action): weights[features[i + j + 1]] = 1 tree = makeTree(multi_set_matrix(features[i], weights)) world.setDynamics(features[i], action, tree) i += args.features_action # define order world.setOrder([set(ag.name for ag in agents)]) for agent in agents: # test belief update: # - set a belief in one feature to the actual initial value (should not change outcomes) # world.setModel(agent.name, Distribution({True: 1.0})) rand_feat = np.random.choice(agent_features[agent]) agent.setBelief(rand_feat, world.getValue(rand_feat)) print('{} will always observe {}={}'.format(agent.name, rand_feat, world.getValue(rand_feat))) # set mental model of each agent in all other agents for i in range(args.agents): for j in range(i + 1, args.agents): world.setMentalModel(agents[i].name, agents[j].name, Distribution({get_fake_model_name(agents[j]): 1})) world.setMentalModel(agents[j].name, agents[i].name, Distribution({get_fake_model_name(agents[i]): 1})) return world
NUM_BINS = 11 NUM_SAMPLES = 100 if __name__ == '__main__': # create world and add agent world = World() agent = Agent('Agent') world.addAgent(agent) # add variable feat = world.defineState(agent.name, 'x', float, lo=LOW, hi=HIGH) # add single action that discretizes the feature action = agent.addAction({'verb': '', 'action': 'discretize'}) tree = makeTree(discretization_tree(world, feat, NUM_BINS)) world.setDynamics(feat, action, tree) world.setOrder([{agent.name}]) print('====================================') print('High:\t{}'.format(HIGH)) print('Low:\t{}'.format(LOW)) print('Bins:\t{}'.format(NUM_BINS)) print('\nSamples/steps:') values_original = [] values_discrete = [] for i in range(NUM_SAMPLES): num = np.random.uniform(LOW, HIGH) world.setFeature(feat, num)
agents = [agent1, agent2] for agent in agents: # set agent's params agent.setAttribute('discount', 1) agent.setHorizon(1) agent.setAttribute('selection', TIEBREAK) # add 'side chosen' variable (0 = didn't decide, 1 = went left, 2 = went right) side = world.defineState(agent.name, 'side', list, [NOT_DECIDED, WENT_LEFT, WENT_RIGHT]) world.setFeature(side, NOT_DECIDED) sides.append(side) # define agents' actions (left and right) action = agent.addAction({'verb': '', 'action': 'go left'}) tree = makeTree(setToConstantMatrix(side, WENT_LEFT)) world.setDynamics(side, action, tree) lefts.append(action) action = agent.addAction({'verb': '', 'action': 'go right'}) tree = makeTree(setToConstantMatrix(side, WENT_RIGHT)) world.setDynamics(side, action, tree) rights.append(action) # create a new model for the agent agent.addModel(get_fake_model_name(agent), parent=agent.get_true_model()) # defines payoff matrices agent1.setReward(get_reward_tree(agent1, sides[0], sides[1]), 1) agent2.setReward(get_reward_tree(agent2, sides[1], sides[0]), 1)
var_counter = world.defineState(agent1.name, 'counter', int, lo=0, hi=3) var_copy = world.defineState(agent2.name, 'counter_copy', int, lo=0, hi=3) # define first agent's action (counter increment) action = agent1.addAction({'verb': '', 'action': 'increment'}) tree = makeTree( multi_set_matrix(var_counter, { var_counter: 1, CONSTANT: 1 })) world.setDynamics(var_counter, action, tree) # define second agent's action (var is copy from counter) action = agent2.addAction({'verb': '', 'action': 'copy'}) tree = makeTree(setToFeatureMatrix(var_copy, var_counter)) world.setDynamics(var_copy, action, tree) world.setOrder(turn_order) # resets vars world.setFeature(var_copy, 0) world.setFeature(var_counter, 0)
# define agents' actions inspired on TIT-FOR-TAT: first decision is open, then retaliate non-cooperation. # as soon as one agent defects it will always defect from there on for i, agent in enumerate(agents): my_dec = agents_dec[i] other_dec = agents_dec[0 if i == 1 else 1] # defect (not legal if other has cooperated before, legal only if agent itself did not defect before) action = agent.addAction({ 'verb': '', 'action': 'defect' }, makeTree({ 'if': equalRow(other_dec, COOPERATED), True: { 'if': equalRow(my_dec, DEFECTED), True: True, False: False }, False: True })) tree = makeTree(setToConstantMatrix(my_dec, DEFECTED)) world.setDynamics(my_dec, action, tree) # cooperate (not legal if other or agent itself defected before) action = agent.addAction({ 'verb': '', 'action': 'cooperate' }, makeTree({ 'if': equalRow(other_dec, DEFECTED), True: False,
lo=0, hi=100) world.setFeature(var_ask_amnt, 0) var_rcv_amnt = world.defineState(ag_consumer.name, 'received amount', int, lo=0, hi=100) world.setFeature(var_rcv_amnt, 0) # add producer actions # produce capacity: if half capacity then 0.5*asked amount else asked amount) act_prod = ag_producer.addAction({'verb': '', 'action': 'produce'}) tree = makeTree({ 'if': equalRow(var_half_cap, True), True: multi_set_matrix(var_rcv_amnt, {var_ask_amnt: 0.5}), False: setToFeatureMatrix(var_rcv_amnt, var_ask_amnt) }) world.setDynamics(var_rcv_amnt, act_prod, tree) # add consumer actions (ask more = 10 / less = 5) act_ask_more = ag_consumer.addAction({'verb': '', 'action': 'ask_more'}) tree = makeTree(setToConstantMatrix(var_ask_amnt, 10)) world.setDynamics(var_ask_amnt, act_ask_more, tree) act_ask_less = ag_consumer.addAction({'verb': '', 'action': 'ask_less'}) tree = makeTree(setToConstantMatrix(var_ask_amnt, 5)) world.setDynamics(var_ask_amnt, act_ask_less, tree) # defines payoff for consumer agent: if received amount > 5 then 10 - rcv_amnt (penalty) else rcv_amount (reward) # this simulates over-stock cost, best is to receive max of 5, more than this has costs
# create world and add agent world = World() agent = Agent('Agent') world.addAgent(agent) # set parameters agent.setAttribute('discount', DISCOUNT) agent.setHorizon(HORIZON) # add position variable pos = world.defineState(agent.name, 'position', int, lo=-100, hi=100) world.setFeature(pos, 0) # define agents' actions (stay 0, left -1 and right +1) action = agent.addAction({'verb': 'move', 'action': 'nowhere'}) tree = makeTree(setToFeatureMatrix(pos, pos)) world.setDynamics(pos, action, tree) action = agent.addAction({'verb': 'move', 'action': 'left'}) tree = makeTree(incrementMatrix(pos, -1)) world.setDynamics(pos, action, tree) action = agent.addAction({'verb': 'move', 'action': 'right'}) tree = makeTree(incrementMatrix(pos, 1)) world.setDynamics(pos, action, tree) # define rewards (maximize position, i.e., always go right) agent.setReward(maximizeFeature(pos, agent.name), 1) # set order world.setOrder([agent.name]) # agent has initial beliefs about its position, which will be updated after executing actions
for agent in agents: # set agent's params agent.setAttribute('discount', 1) agent.setAttribute('selection', TIEBREAK) agent.setHorizon(1) # agent.setRecursiveLevel(1) # add "decision" variable (0 = didn't decide, 1 = went straight, 2 = swerved) dec = world.defineState(agent.name, 'decision', list, [NOT_DECIDED, WENT_STRAIGHT, SWERVED]) world.setFeature(dec, NOT_DECIDED) agents_dec.append(dec) # define agents' actions (defect and cooperate) action = agent.addAction({'verb': '', 'action': 'go straight'}) tree = makeTree(setToConstantMatrix(dec, WENT_STRAIGHT)) world.setDynamics(dec, action, tree) action = agent.addAction({'verb': '', 'action': 'swerve'}) tree = makeTree(setToConstantMatrix(dec, SWERVED)) world.setDynamics(dec, action, tree) # defines payoff matrices agent1.setReward(get_reward_tree(agent1, agents_dec[0], agents_dec[1]), 1) agent2.setReward(get_reward_tree(agent2, agents_dec[1], agents_dec[0]), 1) # define order my_turn_order = [{agent1.name, agent2.name}] world.setOrder(my_turn_order) # add true mental model of the other to each agent world.setMentalModel(agent1.name, agent2.name,
def _createTriageAction(self, agent, color): fov_key = stateKey(agent.name, FOV_FEATURE) loc_key = stateKey(agent.name, 'loc') legal = {'if': equalRow(fov_key, color), True: True, False: False} action = agent.addAction({'verb': 'triage_' + color}, makeTree(legal)) if color == GREEN_STR: threshold = 7 else: threshold = 14 longEnough = differenceRow(makeFuture(self.world.time), self.world.time, threshold) for loc in self.world_map.all_locations: # successful triage conditions conds = [ equalRow(fov_key, color), equalRow(loc_key, loc), longEnough ] # location-specific counter of vics of this color: if successful, decrement vicsInLocOfClrKey = stateKey(WORLD, 'ctr_' + loc + '_' + color) tree = makeTree( anding(conds, incrementMatrix(vicsInLocOfClrKey, -1), noChangeMatrix(vicsInLocOfClrKey))) self.world.setDynamics(vicsInLocOfClrKey, action, tree) # white: increment vicsInLocOfClrKey = stateKey(WORLD, 'ctr_' + loc + '_' + WHITE_STR) tree = makeTree( anding(conds, incrementMatrix(vicsInLocOfClrKey, 1), noChangeMatrix(vicsInLocOfClrKey))) self.world.setDynamics(vicsInLocOfClrKey, action, tree) # Fov update to white tree = { 'if': longEnough, True: setToConstantMatrix(fov_key, WHITE_STR), False: noChangeMatrix(fov_key) } self.world.setDynamics(fov_key, action, makeTree(tree)) # Color saved counter: increment saved_key = stateKey(agent.name, 'numsaved_' + color) tree = { 'if': longEnough, True: incrementMatrix(saved_key, 1), False: noChangeMatrix(saved_key) } self.world.setDynamics(saved_key, action, makeTree(tree)) # Color saved: according to difference diff = {makeFuture(saved_key): 1, saved_key: -1} saved_key = stateKey(agent.name, 'saved_' + color) self.world.setDynamics(saved_key, action, makeTree(dynamicsMatrix(saved_key, diff))) self.world.setDynamics( saved_key, True, makeTree(setFalseMatrix(saved_key))) # default: set to False # increment time self.world.setDynamics( self.world.time, action, makeTree(incrementMatrix(self.world.time, threshold))) self.triageActs[agent.name][color] = action