def compute_rm_from_graph(lm_graph, merge_init_nodes=True): """ Method 1 - Each non-init landmark corresponds to RM (with terminal state) - Edge in each RM corresponds to actions needed to take (ideally only one action for nearest landmark) - RM only reflects the necessary orderings, not partially-ordered :param lm_graph: LandmarkGraph :param merge_init_nodes: bool :return: set of RewardMachine """ if merge_init_nodes: lm_graph.merge_init_nodes() # For each landmark node that is not the initial state, create a RM for it reward_machines = set() for n_id, n in lm_graph.nodes.items(): if not n.in_init(): # initialize empty RewardMachine new_rm = RewardMachine() # populate the RewardMachine from bottom up openlist = list([n]) while len(openlist) != 0: curr_node = openlist.pop(0) # add current state new_rm.add_state_with_landmarks(n_id, copy.copy(curr_node)) # look at parent landmarks that must be achieved before current landmark, for p_id in curr_node.parents: # add a transition from parent to current reward = 0 if curr_node == n: reward = 1 new_rm.set_terminal_state(curr_node.id) new_rm.add_transition(p_id, n_id, 'TODO', ConstantRewardFunction(reward)) openlist.append(lm_graph.nodes[p_id]) if len(curr_node.parents) == 0: # this is the initial state new_rm.set_initial_state(curr_node.id) if len(curr_node.children) == 0: # this is the terminal state new_rm.set_terminal_state(curr_node.id) new_rm.get_txt_representation() reward_machines.add(new_rm) return reward_machines
def rm_net_to_reward_machine(rm_net, world, strict=False): rm = RewardMachine() node2id = dict() for i, node in enumerate(rm_net.nodes()): rm.add_state(i) node2id[node] = i for node in rm_net.nodes(): # no parent, initial state if len(list(rm_net.predecessors(node))) == 0: rm.set_initial_state(node2id[node]) selfloop = ['!{}'.format(e) for e in get_all_events(world)] if strict else [] for child in rm_net.successors(node): action = rm_net.get_edge_data(node, child)['attr'] event_prop = action_to_prop(str(action), world) if event_prop in selfloop: selfloop.pop(selfloop.index(event_prop)) else: if not strict: selfloop.append('!' + str(event_prop)) reward = 0 if len(list(rm_net.successors(child))) == 0: # child is terminal, get reward 1 reward = 1 rm.add_transition(node2id[node], node2id[child], event_prop, ConstantRewardFunction(reward)) # add self loop if len(list(rm_net.successors(node))) == 0: # no children, terminal state rm.set_terminal_state(node2id[node]) else: rm.add_transition(node2id[node], node2id[node], '&'.join(selfloop), ConstantRewardFunction(0)) return rm