Python rewardKey Exemples, psychsim.pwl.rewardKey Python Exemples

Exemple #1

0

Afficher le fichier

def make_single_player_world(player_name,
                             init_loc,
                             loc_neighbors,
                             victims_color_locs,
                             use_unobserved=True,
                             full_obs=False,
                             light_neighbors={},
                             create_observer=True,
                             logger=logging):
    # create world and map
    world = SearchAndRescueWorld()
    world_map = WorldMap(world, loc_neighbors, light_neighbors)

    # create victims info
    victims = Victims(world,
                      victims_color_locs,
                      world_map,
                      full_obs=full_obs,
                      color_prior_p=COLOR_PRIOR_P,
                      color_fov_p=COLOR_FOV_P,
                      color_reqd_times=COLOR_REQD_TIMES)

    # create (single) triage agent
    triage_agent = world.addAgent(player_name)

    world_map.makePlayerLocation(triage_agent, init_loc)
    victims.setupTriager(triage_agent)
    world_map.makeMoveResetFOV(triage_agent)
    victims.createTriageActions(triage_agent)
    if not full_obs:
        if use_unobserved:
            logger.debug('Start to make observable variables and priors')
            victims.createObsVars4Victims(triage_agent)
        logger.debug('Made observable variables and priors')
    victims.makeSearchAction(triage_agent)
    logger.debug('Made actions for triage agent: {}'.format(triage_agent.name))
    triage_agent.setReward(
        makeTree(setToConstantMatrix(rewardKey(triage_agent.name),
                                     0)))  # dummy reward

    # after all agents are created
    victims.makeExpiryDynamics()
    victims.stochasticTriageDur()

    world.setOrder([{triage_agent.name}])

    # observer agent
    observer = make_observer(world, [triage_agent.name],
                             OBSERVER_NAME) if create_observer else None

    # adjust agent's beliefs and observations
    triage_agent.resetBelief()
    triage_agent.omega = [
        key for key in world.state.keys() if not ((key in {
            modelKey(observer.name if observer is not None else ''),
            rewardKey(triage_agent.name)
        }) or (key.find('unobs') > -1))
    ]

    return world, triage_agent, observer, victims, world_map

Exemple #2

0

Afficher le fichier

def get_reward_tree(agent, my_dec, other_dec):
    reward_key = rewardKey(agent.name)
    return makeTree({
        'if': equalRow(my_dec, NOT_DECIDED),  # if I have not decided
        True: setToConstantMatrix(reward_key, INVALID),
        False: {
            'if': equalRow(other_dec, NOT_DECIDED),  # if other has not decided
            True: setToConstantMatrix(reward_key, INVALID),
            False: {
                'if': equalRow(my_dec, COOPERATED),  # if I cooperated
                True: {
                    'if': equalRow(other_dec,
                                   COOPERATED),  # if other cooperated
                    True: setToConstantMatrix(reward_key,
                                              MUTUAL_COOP),  # both cooperated
                    False: setToConstantMatrix(reward_key, SUCKER)
                },
                False: {
                    'if': equalRow(other_dec, COOPERATED),
                    # if I defected and other cooperated
                    True: setToConstantMatrix(reward_key, TEMPTATION),
                    False: setToConstantMatrix(reward_key, PUNISHMENT)
                }
            }
        }
    })

Exemple #3

0

Afficher le fichier

Fichier : helper_functions.py Projet : usc-psychsim/psychsim

def set_constant_reward(agent, value):
    """
    Gets a matrix that sets the reward the reward of the given agent to a constant value.
    :param Agent agent: the agent we want to set the reward.
    :param float value: the value we want to set the reward to.
    :rtype: KeyedMatrix
    :return: a matrix that allows setting the agent's reward to the given constant value.
    """
    return setToConstantMatrix(rewardKey(agent.name), value)

Exemple #4

0

Afficher le fichier

Fichier : dynamics.py Projet : usc-psychsim/rddl2psychsim

 def _convert_reward_function(self):
     # create reward function # TODO assume homogeneous agents
     logging.info('__________________________________________________')
     for agent in self.world.agents.values():
         expr = self._convert_expression(self.model.domain.reward,
                                         dependencies=set())
         tree = self._get_dynamics_tree(rewardKey(agent.name), expr)
         agent.setReward(tree, 1.)
         logging.info(f'Set agent "{agent.name}" reward to:\n{tree}')

Exemple #5

0

Afficher le fichier

Fichier : helper_functions.py Projet : usc-psychsim/psychsim

def multi_reward_matrix(agent, scaled_keys):
    """
    Performs a linear combination of the given keys, i.e., scales and sums all the keys in scaled_keys dict and adds
    offset if CONSTANT in scaled_keys. If the key itself is in scaled_keys, it adds to its scaled value.
    Sets the result to the given agent's reward.
    :param Agent agent: the agent to set the reward.
    :param dict scaled_keys: the dictionary containing the weights (scalars) for each named key.
    :rtype: KeyedMatrix
    :return: a matrix performing the given linear combination to the given key.
    """
    return multi_set_matrix(rewardKey(agent.name), scaled_keys)

Exemple #6

0

Afficher le fichier

Fichier : rewards.py Projet : ualiangzhang/atomic

    def set_reward(self, agent, weight, model=None):
        rwd_feat = rewardKey(agent.name)

        # compares agent's current location
        rwd_tree = {'if': equalRow(self.location_feat, self.all_locations),
                    None: noChangeMatrix(rwd_feat)}

        # get visitation count according to location
        for i, loc in enumerate(self.all_locations):
            loc_freq_feat = get_num_visits_location_key(agent, loc)
            rwd_tree[i] = dynamicsMatrix(rwd_feat, {self.time_feat: 1., loc_freq_feat: -1.}) \
                if self.inverse else setToFeatureMatrix(rwd_feat, loc_freq_feat)

        agent.setReward(makeTree(rwd_tree), weight * self.normalize_factor, model)

Exemple #7

0

Afficher le fichier

Fichier : rewards.py Projet : ualiangzhang/atomic

    def set_reward(self, agent, weight, model=None):
        rwd_feat = rewardKey(agent.name)

        # compares agent's current location
        rwd_tree = {'if': equalRow(self.location_feat, self.all_locations),
                    None: noChangeMatrix(rwd_feat)}

        # get binary value according to visitation of location
        for i, loc in enumerate(self.all_locations):
            loc_freq_feat = get_num_visits_location_key(agent, loc)
            rwd_tree[i] = {'if': thresholdRow(loc_freq_feat, 1),
                           True: setToConstantMatrix(rwd_feat, 1),
                           False: setToConstantMatrix(rwd_feat, 0)}

        agent.setReward(makeTree(rwd_tree), weight * self.normalize_factor, model)

Exemple #8

0

Afficher le fichier

Fichier : victims.py Projet : usc-psychsim/atomic

    def makeVictimReward(self, agent, model=None, rwd_dict=None):
        """ Human gets reward if flag is set
        """

        # collects victims saved of each color
        weights = {}
        for color in self.color_names:
            rwd = rwd_dict[color] if rwd_dict is not None and color in rwd_dict else \
                COLOR_REWARDS[color] if color in COLOR_REWARDS else None
            if rwd is None or rwd == 0:
                continue
            saved_key = stateKey(agent.name, 'saved_' + color)
            weights[saved_key] = rwd

        rwd_key = rewardKey(agent.name)
        agent.setReward(makeTree(dynamicsMatrix(rwd_key, weights)), 1., model)

Exemple #9

0

Afficher le fichier

def get_reward_tree(agent, my_side, other_side):
    reward_key = rewardKey(agent.name)
    return makeTree({
        'if': equalRow(my_side, NOT_DECIDED),  # if I have not decided
        True: setToConstantMatrix(reward_key, INVALID),
        False: {
            'if': equalRow(other_side, INVALID),  # if other has not decided
            True: setToConstantMatrix(reward_key, INVALID),
            False: {
                'if': equalFeatureRow(my_side,
                                      other_side),  # if my_side == other_side
                True: setToConstantMatrix(reward_key, SAME_SIDE_RWD),
                False: setToConstantMatrix(reward_key, DIFF_SIDES_RWD)
            }
        }
    })