def setBeliefsNoVics(world, agent, triageAgent): # Get the canonical name of the "true" player model trueTriageModel = next(iter(triageAgent.models.keys())) # Agent does not model itself agent.resetBelief(ignore={modelKey(agent.name)}) # Triager does not model victims or the ASIST agent dontBelieve = set([modelKey(agent.name)] + \ [key for key in world.state.keys() if key.startswith('victim')]) triageAgent.resetBelief(ignore=dontBelieve) # Agent starts with uniform distribution over triageAgent MMs triageAgent.addModel('myopicMod', horizon=2, parent=trueTriageModel, rationality=.8, selection='distribution') triageAgent.addModel('strategicMod', horizon=4, parent=trueTriageModel, rationality=.8, selection='distribution') world.setMentalModel(agent.name, triageAgent.name, Distribution({ 'myopicMod': 0.5, 'strategicMod': 0.5 })) # Agent observes everything except triageAgent's reward received and true models agent.omega = {key for key in world.state.keys() if key not in \ {modelKey(triageAgent.name), modelKey(agent.name)}} # rewardKey(triageAgent.name),
def set_player_models(world, observer_name, player_name, victims, param_list): """ :param world: the PsychSim World :type world: World :param observer_name: the name of the agent whose beliefs we will be specifying :type observer_name: str :param player_name: the name of the player agent to be modeled :type player_name: str :param param_list: list of dictionaries of model parameter specifications :type param_list: List[Dict] :param victims: specification of victims :type victims: Victims """ observer = world.agents[observer_name] player = world.agents[player_name] # observer does not model itself observer.resetBelief(ignore={modelKey(observer.name)}) # get the canonical name of the "true" player model true_model = player.get_true_model() for param_dict in param_list: model_name = param_dict['name'] if model_name != true_model: player.addModel(model_name, parent=true_model, horizon=param_dict.get('horizon', 2), rationality=param_dict.get('rationality', 0.5), selection=param_dict.get('selection', 'distribution')) if isinstance(next(iter(param_dict['reward'].keys())), str): victims.makeVictimReward(player, model_name, param_dict['reward']) else: for feature, weight in param_dict['reward'].items(): feature.set_reward(player, weight, model_name) beliefs = player.resetBelief(model=model_name, ignore={modelKey(observer.name)}) # observer has uniform prior distribution over possible player models if len(player.models) > 1: world.setMentalModel( observer.name, player.name, Distribution({ param_dict['name']: 1. / (len(player.models) - 1) for param_dict in param_list })) # observer sees everything except true models observer.omega = [ key for key in world.state.keys() if key not in {modelKey(player.name), modelKey(observer.name)} ] # rewardKey(player.name),
def post_step(self, world, act): t = world.getState(WORLD, 'seconds', unique=True) if len(self.model_data) == 0 or self.model_data[-1]['Timestep'] != t: # Haven't made some inference for this timestep (maybe wait until last one?) player_name = self.player_name() player = world.agents[player_name] agent = world.agents['ATOMIC'] # Store beliefs over player models beliefs = agent.getBelief() if len(beliefs) > 1: raise RuntimeError('Agent {} has {} possible models in true state'.format(agent.name, len(beliefs))) beliefs = next(iter(beliefs.values())) player_model = world.getFeature(modelKey(player_name), beliefs) for model in player_model.domain(): entry = {'Timestep': t, 'Belief': player_model[model]} # Find root model (i.e., remove the auto-generated numbers from the name) while player.models[player.models[model]['parent']]['parent'] is not None: model = player.models[model]['parent'] entry['Model'] = model[len(player_name) + 1:] self.model_data.append(entry) if self.condition_dist: condition_dist = Distribution() for model, model_prob in player_model.items(): for condition, condition_prob in self.condition_dist[model_to_cluster(model)].items(): condition_dist.addProb(condition, model_prob*condition_prob) condition_dist.normalize() for condition, condition_prob in condition_dist.items(): self.condition_data.append({'Timestep': t, 'Belief': condition_prob, 'Condition': condition})
def set_action_legality(agent, action, legality=True, models=None): """ Sets legality for an action for the given agent and model. :param Agent agent: the agent whose model(s) we want to set the action legality. :param ActionSet action: the action for which to set the legality. :param bool legality: whether to set this action legal (True) or illegal (False) :param list[str] models: the list of models for which to set the action legality. None will set to the agent itself. """ # tests for "true" model if models is None or len(models) == 0: agent.setLegal(action, makeTree(legality)) return model_key = modelKey(agent.name) # initial tree (end condition is: 'not legality') tree = not legality # recursively builds legality tree by comparing the model's key with the index of the model in the state/vector for model in models: tree = { 'if': equalRow(model_key, agent.model2index(model)), True: legality, False: tree } agent.setLegal(action, makeTree(tree))
def make_single_player_world(player_name, init_loc, loc_neighbors, victims_color_locs, use_unobserved=True, full_obs=False, light_neighbors={}, create_observer=True, logger=logging): # create world and map world = SearchAndRescueWorld() world_map = WorldMap(world, loc_neighbors, light_neighbors) # create victims info victims = Victims(world, victims_color_locs, world_map, full_obs=full_obs, color_prior_p=COLOR_PRIOR_P, color_fov_p=COLOR_FOV_P, color_reqd_times=COLOR_REQD_TIMES) # create (single) triage agent triage_agent = world.addAgent(player_name) world_map.makePlayerLocation(triage_agent, init_loc) victims.setupTriager(triage_agent) world_map.makeMoveResetFOV(triage_agent) victims.createTriageActions(triage_agent) if not full_obs: if use_unobserved: logger.debug('Start to make observable variables and priors') victims.createObsVars4Victims(triage_agent) logger.debug('Made observable variables and priors') victims.makeSearchAction(triage_agent) logger.debug('Made actions for triage agent: {}'.format(triage_agent.name)) triage_agent.setReward( makeTree(setToConstantMatrix(rewardKey(triage_agent.name), 0))) # dummy reward # after all agents are created victims.makeExpiryDynamics() victims.stochasticTriageDur() world.setOrder([{triage_agent.name}]) # observer agent observer = make_observer(world, [triage_agent.name], OBSERVER_NAME) if create_observer else None # adjust agent's beliefs and observations triage_agent.resetBelief() triage_agent.omega = [ key for key in world.state.keys() if not ((key in { modelKey(observer.name if observer is not None else ''), rewardKey(triage_agent.name) }) or (key.find('unobs') > -1)) ] return world, triage_agent, observer, victims, world_map
def runMMBelUpdate(world, agent, triageAgent, actions, Locations): for action in actions: if type(action) == psychsim.action.ActionSet: print('===Agent action: %s' % (action)) world.step(action) # result = beliefs = agent.getBelief() print('len(beliefs)', len(beliefs)) assert len( beliefs ) == 1 # Because we are dealing with a known-identity agent belief = next(iter(agent.getBelief().values())) print('Agent now models player as:') key = modelKey(triageAgent.name) print(world.getFeature(key, belief)) else: [var, val] = action print('===Setting feature', var, val) world.setState(triageAgent.name, var, val) print('--World state') world.printState(beliefs=False)
) args = parser.parse_args() #args.log_rewards = True conv = Converter() conv.convert_file(RDDL_FILE, verbose=True) agents = set(conv.world.agents.keys()) for agent in conv.world.agents.values(): agent.create_belief_state() zeros = {name: agent.zero_level() for name, agent in conv.world.agents.items()} for name, agent in conv.world.agents.items(): beliefs = agent.getBelief() model = agent.get_true_model() belief = agent.getBelief(model=model) for other in agents - {name}: conv.world.setFeature(modelKey(other), zeros[other], belief) ################# S T E P T H R O U G H steps = 10 for i in range(steps): logging.info(f'\n__________________________________________________{i}') debug = {ag_name: {} for ag_name in conv.actions.keys() } if args.log_rewards else dict() conv.world.step(debug=debug, threshold=args.threshold, select=args.select) conv.log_state(log_actions=args.log_actions) if args.log_rewards: for ag_name in conv.actions.keys(): _log_agent_reward(ag_name) conv.verify_constraints()
create_clear_dir(OUTPUT_DIR) # sets up log to file change_log_handler(os.path.join(OUTPUT_DIR, 'inference.log'), 2 if DEBUG else 1) maps = get_default_maps() if EXPT not in maps: raise NameError(f'Experiment "{EXPT}" is not implemented yet') # create world, agent and observer map_data = maps[EXPT] world, agent, observer, victims, world_map = \ make_single_player_world(AGENT_NAME, map_data.init_loc, map_data.adjacency, map_data.victims, False, FULL_OBS) agent.setAttribute('horizon', HORIZON) agent.setAttribute('selection', AGENT_SELECTION) agent.resetBelief(ignore={modelKey(observer.name)}) model_names = create_mental_models(world, agent, observer, victims) # generates trajectory logging.info('Generating trajectory of length {}...'.format(NUM_STEPS)) trajectory = generate_trajectory(agent, NUM_STEPS) save_object(trajectory, os.path.join(OUTPUT_DIR, 'trajectory.pkl.gz'), True) # gets evolution of inference over reward models of the agent probs = track_reward_model_inference( trajectory, model_names, agent, observer, [stateKey(agent.name, 'loc')], verbose=False) # create and save inference evolution plot plot_evolution(probs.T, [_get_fancy_name(name) for name in model_names], 'Evolution of Model Inference', None,
def printASISTBel(world, triageAgent, agent): belief = next(iter(agent.getBelief().values())) print('Agent now models player as:') key = modelKey(triageAgent.name) print(world.float2value(key, belief[key]))