Esempio n. 1
0
    def parse(self):
        tree = ET.parse(self.file)
        root = tree.getroot()

        actiondict = {}
        statedict = {}

        for st in root.iter('location'):
            st_id = st.find('id')
            state = State(st_id)
            statedict[st_id] = state

        for a in root.iter('branchpoint'):
            a_id = a.find('id')
            action = Action(a_id)
            actiondict[a_id] = action

        for t in root.iter('transition'):
            pass

        actionlist = list(actiondict.keys())
        statelist = list(statedict.keys())

        return statelist, actionlist
Esempio n. 2
0
from MDP import State, Action, Transition
import ValueIter

y = 0.2
e = 0.01
reward = 15
#Add states
states = {
    "A": State("A"),
    "B": State("B"),
    "C": State("C"),
    "R": State("R", reward, 1)
}

#Add actions
states["A"].add_action(Action("A", "rt"))
states["A"].add_action(Action("A", "up"))
states["B"].add_action(Action("B", "lt"))
states["B"].add_action(Action("B", "up"))
states["C"].add_action(Action("C", "rt"))
states["C"].add_action(Action("C", "dn"))

#Add transitions A
states["A"].actions["rt"].add_transition("B", 0.8, -1)
states["A"].actions["rt"].add_transition("A", 0.2, -1)
states["A"].actions["up"].add_transition("C", 0.8, -1)
states["A"].actions["up"].add_transition("A", 0.2, -1)

#Add transitions B
states["B"].actions["lt"].add_transition("A", 0.8, -1)
states["B"].actions["lt"].add_transition("B", 0.2, -1)
Esempio n. 3
0
                    ref[state] = idx 
                    idx += 1 

# Add actions 
for p in POSITIONS:
    for m in MATERIALS:
        for a in ARROWS:
            for mm in MM:
                for h in HEALTH:

                    cur = (p, m, a, mm, h)
                    now = ref[cur] 

                    if h == 0:
                        states[now].add_action(Action(now, "NONE"))
                        continue 

                    states[now].add_action(Action(now, "STAY"))

                    if p == "W":
                        states[now].add_action(Action(now, "RIGHT"))
                        if a > 0:
                            states[now].add_action(Action(now, "SHOOT"))

                    elif p == "E":
                        states[now].add_action(Action(now, "LEFT"))

                        states[now].add_action(Action(now, "HIT"))
                        if a > 0:
                            states[now].add_action(Action(now, "SHOOT"))
Esempio n. 4
0
                    ref[idx] = state 
                    idx += 1 

# Add actions 
for p in POSITIONS:
    for m in MATERIALS:
        for a in ARROWS:
            for mm in MM:
                for h in HEALTH:
                    if h == 0:
                        continue 

                    cur = (p, m, a, mm, h)
                    now = ref[cur] 
                    states[now].add_action(Action(now, "STAY"))

                    if p == "W":
                        states[now].add_action(Action(now, "RIGHT"))
                        if a > 0:
                            states[now].add_action(Action(now, "SHOOT"))

                    elif p == "E":
                        states[now].add_action(Action(now, "LEFT"))

                        states[now].add_action(Action(now, "HIT"))
                        if a > 0:
                            states[now].add_action(Action(now, "SHOOT"))

                    elif p == "S":
                        states[now].add_action(Action(now, "UP"))
Esempio n. 5
0
    # ------------

    # ---- States ----
    states_list = [State(1, 5.), State(2, 10), State(3, -7.2)]

    # ---- MDP ----
    disc_fact = 0.5
    chain = MDP(states_list, disc_fact)

    # ---- Actions ----

    # ----
    transitions_a = np.array([[0.3, 0.6, 0.1], [0.1, 0.6, 0.3],
                              [0.5, 0.0, 0.5]])
    reward_a = np.array([5.0, 5.0, 1.0])
    a = Action("a", chain, transitions_a, reward_a)
    # ----

    # ----
    transitions_b = np.array([[0.0, 0.3, 0.7], [1 / 3, 1 / 3, 1 / 3],
                              [0.0, 0.5, 0.5]])
    reward_b = np.array([2.8, 0.0, 10.0])
    b = Action("b", chain, transitions_b, reward_b)
    # ----

    # ----
    transitions_c = np.array([[0.2, 0.4, 0.4], [0.2, 0.6, 0.2],
                              [1 / 3, 1 / 3, 1 / 3]])
    reward_c = np.array([-7.2, -7.2, 0.0])
    c = Action("c", chain, transitions_c, reward_c)
    # ----