예제 #1
0
    def step(self, a):
        transitions = self.P[self.s][a]
        i = discrete.categorical_sample([t[0] for t in transitions],
                                        self.np_random)
        p, s, r, d = transitions[i]

        row = s // self.nrow
        col = s - (row * self.nrow)
        letter = self.desc[row, col]

        while letter in b'CLDRU':
            last_s = s
            if letter in b'LDRU':
                a = ACTIONS[letter]
                p, s, r, d = self.P[s][a][0]
            elif letter == b'C':
                p, s, r, d = self.P[s][a][
                    0]  # 'C' makes the agent continue in the direction it last moved in

            row = s // self.nrow
            col = s - (row * self.nrow)
            letter = self.desc[row, col]

            if (
                    last_s == s
            ):  # If we tried moving again but it put us at the same state, we tried to walk off the edge of the map and should stop trying
                break

        if letter == b'W':  # When we hit a wall, we walk into it then step back out
            reverse_a = (a + 2) % 4
            p, s, r, d = self.P[s][reverse_a][0]

        self.s = s
        self.lastaction = a
        return (int(s), r, d, {"prob": p})
예제 #2
0
 def reset(self):
     indx = discrete.categorical_sample([prob for prob, state in self.isd],
                                        self.np_random)
     self.lastaction = None
     _, state = self.isd[indx]
     self.s = tuple(state)
     return self.s
예제 #3
0
    def reset(self):
        self.s = discrete.categorical_sample(self.isd, self.np_random)
        self.lastaction = None
        self.rm_size = []
        self.flow_time_link = 0
        self.num_flows = 0

        self.seed(7)

        wt = [[0.2 * (np.random.random() - 0.5) + 0.9 for i in range(self.nS)]
              for j in range(self.nA)]
        for j in range(self.nA):
            if j == 1:
                wt[j][0:self.nS] = [
                    0.2 * (np.random.random() - 0.5) + 0.8
                    for i in range(self.nS)
                ]
            if j == 2:
                wt[j][0:self.nS] = [
                    0.2 * (np.random.random() - 0.5) + 0.6
                    for i in range(self.nS)
                ]

        self.rate = np.matmul(wt, np.diag(self.bandwidth_cap))
        return self.s
 def step(self, a):
     transitions = self.P[self.s][a]
     i = discrete.categorical_sample([t[0] for t in transitions], self.np_random)
     p, s, r, d= transitions[i]
     self.s = s
     self.lastaction = a
     return (s, r, d, {"prob" : p})
 def step(self, a):
     transitions = self.P[self.s][a]
     i = discrete.categorical_sample(
         [t[0] if len(t) > 0 else 0 for t in transitions], self.np_random)
     p, s, r, d = transitions[i]
     self.s = s
     self.last_action = a
     return int(s), r, d, {"prob": p}
예제 #6
0
    def reset(self):

        self.desc = self._set_start()
        self.p, self.isd = self._create_transition_matrix()

        self.s = categorical_sample(self.isd, self.np_random)
        self.last_action = None
        self.remaining_steps = MAX_STEPS
        return self.s
예제 #7
0
 def step(self, a):
     transitions = self.P[self.s][a]
     i = discrete.categorical_sample([t[0] for t in transitions], self.np_random)
     p, s, r, d, info = transitions[i]
     self.s = s
     self.lastaction = a
     info.update({"prob": p})
     self.nsteps += 1
     d = np.logical_or(d, self.nsteps > self.timeout)
     return s, r, d, info
 def step(self, a):
     if not self.action_space.spaces[self.s].contains(a):
         raise ValueError(
             f"Action must be < {self.action_space.spaces[self.s].n} in space {self.s}, attempted {a}"
         )
     transitions = self.P[self.s][a]
     i = categorical_sample([t[0] for t in transitions], self.np_random)
     p, s, r, d= transitions[i]
     self.lastaction = (self.s, a)
     self.s = s
     return (s, r, d, {"prob" : p})
예제 #9
0
    def step(self, a):
        transitions = self.p[self.s][a]
        i = categorical_sample([t[0] for t in transitions], self.np_random)
        p, s, r, d = transitions[i]
        self.s = s
        self.last_action = a
        self.remaining_steps -= 1

        if self.remaining_steps <= 0:
            d = True

        return s, r, d, {"prob": p}
    def __init__(self, nS, vA, P, isd):
        self.P = P
        self.isd = isd
        self.lastaction = None # for rendering
        self.nS = nS
        self.vA = np.array(vA)

        assert (self.vA >= 0).all(), "Number of actions per state must be nonnegative."
        self.observation_space = spaces.Discrete(self.nS)
        self.action_space = spaces.Tuple(tuple(spaces.Discrete(nA) for nA in self.vA))

        self.seed()
        self.s = categorical_sample(self.isd, self.np_random)
예제 #11
0
    def step(self, a):
        transitions = self.P[self.s][a]
        i = categorical_sample([t[0] for t in transitions], self.np_random)
        p, new_s, r, d = transitions[i]

        row, col = self.coordinates(self.s)
        new_row, new_col = self.coordinates(new_s)
        new_letter = self.desc[new_row, new_col]
        _r = r(new_letter, row, col, new_row, new_col)

        self.s = new_s
        self.lastaction = a
        new_s = new_s + self.exists_adjacent_goal * self.nS / 2
        return (new_s, _r, d, {"prob": p})
예제 #12
0
파일: ad_env.py 프로젝트: sna89/gym_ad
    def step(self, action):
        assert self.action_space.contains(action)
        assert (1 < self.current_state[1] < self.max_steps_from_alert and action == 0) \
               or self.current_state[1] == 1 \
               or self.current_state[1] == self.max_steps_from_alert, \
            'Must choose wait action if alert is triggered'

        self.last_action = action
        transitions = self.P[self.current_state][action]

        i = categorical_sample([t[0] for t in transitions], self.np_random)
        prob, self.current_state, reward, done = transitions[i]

        return self.current_state, reward, done, prob
예제 #13
0
    def step(self, a: int):
        state_locations = self.state_to_locations(self.s)
        if self.is_terminal(state_locations):
            return self.s, 0, True, {"prob": 0}

        single_agent_actions = [
            ACTIONS_TO_INT[single_agent_action]
            for single_agent_action in integer_action_to_vector(
                a, self.n_agents)
        ]
        single_agent_states = tuple([
            self.loc_to_int[single_agent_loc]
            for single_agent_loc in state_locations
        ])
        single_agent_movements = [
            self.single_agent_movements(single_agent_states[i],
                                        single_agent_actions[i])
            for i in range(self.n_agents)
        ]

        next_local_states = ()
        total_prob = 1
        # single_agent_movements is a list of [[(s1,s'1,p), (s1,s''1,p),...], [(s2,s'2,p), (s2,s''2,p),...]], ...]
        # We want to first choose the transition for agent1, then for agent2, etc.
        for agent_movements in single_agent_movements:
            probs = [t[2] for t in agent_movements]
            chosen_movement_idx = categorical_sample(probs, self.np_random)
            next_local_states = next_local_states + (
                (agent_movements[chosen_movement_idx][1]), )
            total_prob *= agent_movements[chosen_movement_idx][2]

        # next_local_states holds a list of the local states of the agents
        next_locations = tuple([
            self.valid_locations[local_state]
            for local_state in next_local_states
        ])
        new_state = self.locations_to_state(next_locations)
        reward, done, collision = self.calc_transition_reward_from_local_states(
            single_agent_states, a, next_local_states)

        self.s = new_state
        return new_state, reward, done, {
            "prob": total_prob,
            "collision": collision
        }
예제 #14
0
    def __init__(self):
        self.nS = 20
        self.nA = 3
        self.isd = [1 / self.nS for x in range(self.nS)]
        self.nF = 10
        self.rm_size = []

        self.num_flows = 0
        self.flow_time_link = 0
        self.cum_flowtime = 0

        self.lastaction = None
        self.action_space = spaces.Discrete(self.nA)
        self.observation_space = spaces.Discrete(self.nS)

        self.seed(9)
        self.s = discrete.categorical_sample(self.isd, self.np_random)

        wt = [[0.2 * (np.random.random() - 0.5) + 0.9 for i in range(self.nS)]
              for j in range(self.nA)]
        for j in range(self.nA):
            if j == 1:
                wt[j][0:self.nS] = [
                    0.2 * (np.random.random() - 0.5) + 0.7
                    for i in range(self.nS)
                ]
            if j == 2:
                wt[j][0:self.nS] = [
                    0.2 * (np.random.random() - 0.5) + 0.5
                    for i in range(self.nS)
                ]
        # Mean of wt = [0.9, 0.8, 0.6]
        #           or [0.9, 0.7, 0.5] (lines of MAB and A2C merge in the end)

        self.bandwidth_cap = [i + 1 for i in range(self.nS)]
        self.rate = np.matmul(wt, np.diag(
            self.bandwidth_cap))  # dimension: nA x nS

        self.P = {s: {a: [] for a in range(self.nA)} for s in range(self.nS)}
        for s in range(self.nS):
            for a in range(self.nA):
                for next_s in range(self.nS):
                    self.P[s][a].append((1 / self.nS, next_s))
예제 #15
0
    def __init__(self, env_map, random_start=False):

        self.base_map = env_map
        self.random_start = random_start
        self.desc = self._set_start()
        self.nrow, self.ncol = self.desc.shape

        self.nA = 4
        self.nS = self.nrow * self.ncol

        self.p, self.isd = self._create_transition_matrix()
        self.last_action = None  # for rendering

        self.remaining_steps = MAX_STEPS

        self.action_space = spaces.Discrete(self.nA)
        self.observation_space = spaces.Discrete(self.nS)

        self.seed()
        self.s = categorical_sample(self.isd, self.np_random)
예제 #16
0
    def step(self, a):
        if self.num_flows < self.nF:
            self.newflow_size = self.nS  # Need to read from a list of flow sizes
            self.rm_size.append(self.newflow_size)
            self.num_flows += 1

        transitions = self.P[self.s][a]
        i = discrete.categorical_sample([t[0] for t in transitions],
                                        self.np_random)
        p, newstate = transitions[i]
        self.s = newstate

        wt = [[0.2 * (np.random.random() - 0.5) + 0.9 for i in range(self.nS)]
              for j in range(self.nA)]
        for j in range(self.nA):
            if j == 1:
                wt[j][0:self.nS] = [
                    0.2 * (np.random.random() - 0.5) + 0.8
                    for i in range(self.nS)
                ]
            if j == 2:
                wt[j][0:self.nS] = [
                    0.2 * (np.random.random() - 0.5) + 0.6
                    for i in range(self.nS)
                ]

        self.rate = np.matmul(wt, np.diag(self.bandwidth_cap))
        reward = self.rate[a][self.s]

        self.rm_size, self.flow_time_link = self._get_flow_time(
            self.rm_size, self.flow_time_link, self.rate[a][self.s])

        if self.rm_size == [] and self.num_flows >= self.nF:
            done = True
            self.cum_flowtime += self.flow_time_link
        else:
            done = False
        return (newstate, reward, done, {"prob": p})
예제 #17
0
 def reset(self):
     self.s = categorical_sample(self.isd, self.np_random)
     self.start_s = self.s
     self.lastaction = None
     return self.s + self.exists_adjacent_goal * self.nS / 2
예제 #18
0
 def reset(self):
     self.s = discrete.categorical_sample(self.isd, self.np_random)
     self.lastaction = None
     self.reward = self.calc_grid_reward()  #Is this ok?
     self.P, self.Q = self.calc_reward_stop_probability()
     return self.s
예제 #19
0
 def __call__(self, values):
     probas = self.get_probas(values)
     action = categorical_sample(probas, self.np_random)
     return action
 def reset(self):
     self.s = discrete.categorical_sample(self.isd, self.np_random)
     self.last_action = None
     return int(self.s)
 def reset(self):
     self.s = categorical_sample(self.isd, self.np_random)
     self.lastaction = None
     return self.s