Esempio n. 1
0
def compute_rm_from_graph(lm_graph, merge_init_nodes=True):
    """
    Method 1
    - Each non-init landmark corresponds to RM (with terminal state)
    - Edge in each RM corresponds to actions needed to take (ideally only one action for nearest landmark)
    - RM only reflects the necessary orderings, not partially-ordered

    :param lm_graph: LandmarkGraph
    :param merge_init_nodes: bool
    :return: set of RewardMachine
    """
    if merge_init_nodes:
        lm_graph.merge_init_nodes()

    # For each landmark node that is not the initial state, create a RM for it
    reward_machines = set()
    for n_id, n in lm_graph.nodes.items():
        if not n.in_init():
            # initialize empty RewardMachine
            new_rm = RewardMachine()
            # populate the RewardMachine from bottom up
            openlist = list([n])
            while len(openlist) != 0:
                curr_node = openlist.pop(0)
                # add current state
                new_rm.add_state_with_landmarks(n_id, copy.copy(curr_node))

                # look at parent landmarks that must be achieved before current landmark,
                for p_id in curr_node.parents:
                    # add a transition from parent to current
                    reward = 0
                    if curr_node == n:
                        reward = 1
                        new_rm.set_terminal_state(curr_node.id)

                    new_rm.add_transition(p_id, n_id, 'TODO',
                                          ConstantRewardFunction(reward))
                    openlist.append(lm_graph.nodes[p_id])

                if len(curr_node.parents) == 0:
                    # this is the initial state
                    new_rm.set_initial_state(curr_node.id)

                if len(curr_node.children) == 0:
                    # this is the terminal state
                    new_rm.set_terminal_state(curr_node.id)

            new_rm.get_txt_representation()
            reward_machines.add(new_rm)

    return reward_machines
Esempio n. 2
0
def rm_net_to_reward_machine(rm_net, world, strict=False):
    rm = RewardMachine()
    node2id = dict()
    for i, node in enumerate(rm_net.nodes()):
        rm.add_state(i)
        node2id[node] = i

    for node in rm_net.nodes():
        # no parent, initial state
        if len(list(rm_net.predecessors(node))) == 0:
            rm.set_initial_state(node2id[node])

        selfloop = ['!{}'.format(e)
                    for e in get_all_events(world)] if strict else []
        for child in rm_net.successors(node):
            action = rm_net.get_edge_data(node, child)['attr']
            event_prop = action_to_prop(str(action), world)
            if event_prop in selfloop:
                selfloop.pop(selfloop.index(event_prop))
            else:
                if not strict:
                    selfloop.append('!' + str(event_prop))
            reward = 0
            if len(list(rm_net.successors(child))) == 0:
                # child is terminal, get reward 1
                reward = 1
            rm.add_transition(node2id[node], node2id[child], event_prop,
                              ConstantRewardFunction(reward))

        # add self loop
        if len(list(rm_net.successors(node))) == 0:
            # no children, terminal state
            rm.set_terminal_state(node2id[node])
        else:
            rm.add_transition(node2id[node], node2id[node], '&'.join(selfloop),
                              ConstantRewardFunction(0))

    return rm