def make_single_player_world(player_name, init_loc, loc_neighbors, victims_color_locs, use_unobserved=True, full_obs=False, light_neighbors={}, create_observer=True, logger=logging): # create world and map world = SearchAndRescueWorld() world_map = WorldMap(world, loc_neighbors, light_neighbors) # create victims info victims = Victims(world, victims_color_locs, world_map, full_obs=full_obs, color_prior_p=COLOR_PRIOR_P, color_fov_p=COLOR_FOV_P, color_reqd_times=COLOR_REQD_TIMES) # create (single) triage agent triage_agent = world.addAgent(player_name) world_map.makePlayerLocation(triage_agent, init_loc) victims.setupTriager(triage_agent) world_map.makeMoveResetFOV(triage_agent) victims.createTriageActions(triage_agent) if not full_obs: if use_unobserved: logger.debug('Start to make observable variables and priors') victims.createObsVars4Victims(triage_agent) logger.debug('Made observable variables and priors') victims.makeSearchAction(triage_agent) logger.debug('Made actions for triage agent: {}'.format(triage_agent.name)) triage_agent.setReward( makeTree(setToConstantMatrix(rewardKey(triage_agent.name), 0))) # dummy reward # after all agents are created victims.makeExpiryDynamics() victims.stochasticTriageDur() world.setOrder([{triage_agent.name}]) # observer agent observer = make_observer(world, [triage_agent.name], OBSERVER_NAME) if create_observer else None # adjust agent's beliefs and observations triage_agent.resetBelief() triage_agent.omega = [ key for key in world.state.keys() if not ((key in { modelKey(observer.name if observer is not None else ''), rewardKey(triage_agent.name) }) or (key.find('unobs') > -1)) ] return world, triage_agent, observer, victims, world_map
def get_reward_tree(agent, my_dec, other_dec): reward_key = rewardKey(agent.name) return makeTree({ 'if': equalRow(my_dec, NOT_DECIDED), # if I have not decided True: setToConstantMatrix(reward_key, INVALID), False: { 'if': equalRow(other_dec, NOT_DECIDED), # if other has not decided True: setToConstantMatrix(reward_key, INVALID), False: { 'if': equalRow(my_dec, COOPERATED), # if I cooperated True: { 'if': equalRow(other_dec, COOPERATED), # if other cooperated True: setToConstantMatrix(reward_key, MUTUAL_COOP), # both cooperated False: setToConstantMatrix(reward_key, SUCKER) }, False: { 'if': equalRow(other_dec, COOPERATED), # if I defected and other cooperated True: setToConstantMatrix(reward_key, TEMPTATION), False: setToConstantMatrix(reward_key, PUNISHMENT) } } } })
def set_constant_reward(agent, value): """ Gets a matrix that sets the reward the reward of the given agent to a constant value. :param Agent agent: the agent we want to set the reward. :param float value: the value we want to set the reward to. :rtype: KeyedMatrix :return: a matrix that allows setting the agent's reward to the given constant value. """ return setToConstantMatrix(rewardKey(agent.name), value)
def _convert_reward_function(self): # create reward function # TODO assume homogeneous agents logging.info('__________________________________________________') for agent in self.world.agents.values(): expr = self._convert_expression(self.model.domain.reward, dependencies=set()) tree = self._get_dynamics_tree(rewardKey(agent.name), expr) agent.setReward(tree, 1.) logging.info(f'Set agent "{agent.name}" reward to:\n{tree}')
def multi_reward_matrix(agent, scaled_keys): """ Performs a linear combination of the given keys, i.e., scales and sums all the keys in scaled_keys dict and adds offset if CONSTANT in scaled_keys. If the key itself is in scaled_keys, it adds to its scaled value. Sets the result to the given agent's reward. :param Agent agent: the agent to set the reward. :param dict scaled_keys: the dictionary containing the weights (scalars) for each named key. :rtype: KeyedMatrix :return: a matrix performing the given linear combination to the given key. """ return multi_set_matrix(rewardKey(agent.name), scaled_keys)
def set_reward(self, agent, weight, model=None): rwd_feat = rewardKey(agent.name) # compares agent's current location rwd_tree = {'if': equalRow(self.location_feat, self.all_locations), None: noChangeMatrix(rwd_feat)} # get visitation count according to location for i, loc in enumerate(self.all_locations): loc_freq_feat = get_num_visits_location_key(agent, loc) rwd_tree[i] = dynamicsMatrix(rwd_feat, {self.time_feat: 1., loc_freq_feat: -1.}) \ if self.inverse else setToFeatureMatrix(rwd_feat, loc_freq_feat) agent.setReward(makeTree(rwd_tree), weight * self.normalize_factor, model)
def set_reward(self, agent, weight, model=None): rwd_feat = rewardKey(agent.name) # compares agent's current location rwd_tree = {'if': equalRow(self.location_feat, self.all_locations), None: noChangeMatrix(rwd_feat)} # get binary value according to visitation of location for i, loc in enumerate(self.all_locations): loc_freq_feat = get_num_visits_location_key(agent, loc) rwd_tree[i] = {'if': thresholdRow(loc_freq_feat, 1), True: setToConstantMatrix(rwd_feat, 1), False: setToConstantMatrix(rwd_feat, 0)} agent.setReward(makeTree(rwd_tree), weight * self.normalize_factor, model)
def makeVictimReward(self, agent, model=None, rwd_dict=None): """ Human gets reward if flag is set """ # collects victims saved of each color weights = {} for color in self.color_names: rwd = rwd_dict[color] if rwd_dict is not None and color in rwd_dict else \ COLOR_REWARDS[color] if color in COLOR_REWARDS else None if rwd is None or rwd == 0: continue saved_key = stateKey(agent.name, 'saved_' + color) weights[saved_key] = rwd rwd_key = rewardKey(agent.name) agent.setReward(makeTree(dynamicsMatrix(rwd_key, weights)), 1., model)
def get_reward_tree(agent, my_side, other_side): reward_key = rewardKey(agent.name) return makeTree({ 'if': equalRow(my_side, NOT_DECIDED), # if I have not decided True: setToConstantMatrix(reward_key, INVALID), False: { 'if': equalRow(other_side, INVALID), # if other has not decided True: setToConstantMatrix(reward_key, INVALID), False: { 'if': equalFeatureRow(my_side, other_side), # if my_side == other_side True: setToConstantMatrix(reward_key, SAME_SIDE_RWD), False: setToConstantMatrix(reward_key, DIFF_SIDES_RWD) } } })