def __init__(self, env, max_layer):
        self.env = env
        self.modifications = []
        self.counter = 0
        self.nodes = []

        agent = w_QAgent(env)
        agent.qlearn(3000, render=False)
        cell_dict = cell_frequency(agent)

        for element in cell_dict:
            if element[1] != 0:
                self.modifications.append((0, element[0][0], element[0][1]))
                self.modifications.append((1, element[0][0], element[0][1]))

        self.modifications.sort()

        self.num_nodes = 0
        self.root = None
        self.max_layer = max_layer
        self.threshold = 10.75

        # Storing best reward and corresponding environment
        self.max_reward = float("-inf")
        self.opt_env = None
Exemple #2
0
def potency(mutual_ls, agent, modified, num_episodes, index):
    # This function tests the potency of the connected q-learning paradigm.
    # Parameters
    # ==============================================
    # agent: pre-trained agent in some environment
    # modified: new environment
    # num_episodes: number of episodes trained in connected q-learning paradigm
    # ==============================================

    series = agent.env.resettable_states()

    conn_agent = connected_qlearn(agent, modified, num_episodes)
    l_vals = []

    for state in series:
        res = conn_agent.eval(fixed=state, show=False)[1]
        l_vals.append(res)

    new_agent = w_QAgent(modified)
    new_agent.qlearn(3500, show=False, render=False)
    n_vals = []

    for state in series:
        res = new_agent.eval(fixed=state, show=False)[1]
        n_vals.append(res)

    l_vals = np.array(l_vals)
    n_vals = np.array(n_vals)

    a = abs(np.sum(l_vals) - np.sum(n_vals))

    mutual_ls[index] = a
    def default_policy(self, node_index):
        start = node_index
        simulate_env = copy.deepcopy(self.nodes[start].env)
        num_modifications_applied = len(simulate_env.jump_cells) + len(
            simulate_env.special) - len(self.env.special) - len(
                self.env.jump_cells)
        mods_left = self.max_layer - num_modifications_applied

        # Choose from unused modifications, from start node
        # We know that tree.nodes[start] is a leaf, so there is no used modifications at start yet.
        ls = []
        for i in range(self.nodes[start].modification + 1,
                       len(self.modifications)):
            ls.append(i)

        try:
            a = random.sample(ls, k=mods_left)

        except:
            print(ls)
            print(num_modifications_applied)
            raise ValueError

        a = sorted(a)
        for element in a:
            mod = self.modifications[element]
            if mod[0] == 0:
                simulate_env.jump_cells.append((mod[1], mod[2]))
            elif mod[0] == 1:
                simulate_env.special.append((mod[1], mod[2]))

        # Training
        agent = w_QAgent(simulate_env)
        agent.qlearn(3000, show=False)
        reward = utility(agent)

        if reward > self.threshold:
            print(colored(a, "red"))
            print(colored(reward, "red"))
            for element in a:
                start = self.add_node(element, start).index

            # Update tree's max reward if possible
            if reward > self.max_reward:
                self.max_reward = reward
                self.opt_env = simulate_env

            return [self.scale(reward), start]

        return self.scale(reward)
def connected_qlearn(agent, new_env, num_episodes):
    # Parameters
    # ==============================================
    # agent: pre-trained agent in some environment
    # new_env: new environment
    # ==============================================

    # We will use the pre-trained agent to train it in the new environment
    # Intuition is that the q-values only need slight changes, so it will be computationally wasteful to calculate from scratch

    linked_agent = w_QAgent(new_env)
    linked_agent.q = copy.deepcopy(agent.q)  # linking the q-values together

    linked_agent.epsilon = 0.75
    linked_agent.qlearn(num_episodes, render=False)

    return linked_agent
Exemple #5
0
    def best_observed_choice(self):
        vector = []
        for jump in self.opt_env.jump_cells:
            if jump not in self.env.jump_cells:
                tup = (0, jump[0], jump[1])
                vector.append(tup)

        for cell in self.opt_env.special:
            if cell not in self.env.special:
                tup = (1, cell[0], cell[1])
                vector.append(tup)

        # Training to prevent errors arising from connected training
        agent = w_QAgent(self.opt_env)
        agent.qlearn(3500)
        rews = utility(agent)

        x = max(rews, self.max_reward)

        return (vector, x)
Exemple #6
0
    def greedy(self):
        walk = []
        start = 0
        while self.nodes[start].layer < self.max_layer:
            if len(self.nodes[start].visited_children) != 0:
                start = self.best_child(start, 0, 0, expanded=False)
                mod_index = self.nodes[start].modification
                walk.append(self.modifications[mod_index])

        if len(walk) < self.max_layer:
            print("MCTS insufficient to get {} modifications!".format(
                self.max_layer))
            return (walk, None)

        else:
            modified = make_env(self.env, walk)
            agent = w_QAgent(modified)
            agent.qlearn(3000, render=False)
            rews = utility(agent)
            return (walk, rews)
Exemple #7
0
    def default_policy(self, node_index):
        start = node_index
        simulate_env = copy.deepcopy(self.nodes[start].env)
        num_modifications_applied = len(simulate_env.jump_cells) + len(
            simulate_env.special) - len(self.env.special) - len(
                self.env.jump_cells)
        mods_left = self.max_layer - num_modifications_applied

        # Choose from unused modifications, from start node
        # We know that tree.nodes[start] is a leaf, so there is no used modifications at start yet.
        ls = []

        if node_index != 0:
            for i in range(self.nodes[start].modification + 1,
                           len(self.modifications)):
                ls.append(i)

        else:
            ls = [i for i in range(len(self.modifications))]

        a = random.sample(ls, k=mods_left)
        a = sorted(a)
        for element in a:
            mod = self.modifications[element]

            if mod[0] == 0:
                simulate_env.jump_cells.append((mod[1], mod[2]))
            elif mod[0] == 1:
                simulate_env.special.append((mod[1], mod[2]))

        # Training
        agent = w_QAgent(simulate_env)
        agent.qlearn(3000, show=False)
        reward = utility(agent)

        if reward > self.max_reward:
            self.max_reward = reward
            self.opt_env = copy.deepcopy(simulate_env)

        return reward
Exemple #8
0
    else:
        chosen_vectors = ls[0:(rounds * num_processes)]

    for iter in range(rounds):
        print(colored("Data addition round {} begins!".format(iter), "red"))
        for i in range(num_processes):
            if i + iter * num_processes >= len(chosen_vectors):
                break

            # results = simulate_env(env, num_mods)
            v = chosen_vectors[i + iter * num_processes]
            # modified = results[0]
            modified = make_env(env, v)
            # categories.append(results[1])
            categories.append(v)
            agent = w_QAgent(modified)
            p = mp.Process(target=qlearn_as_func,
                           args=(agent, modified, i, agents,
                                 i + iter * num_processes))
            p.start()
            processes.append(p)

        for process in processes:
            process.join()

        for process in processes:
            process.terminate()

    for i in range(len(agents)):
        if agents[i] != 0:
            ut = utility(agents[i])
Exemple #9
0
            ref_env.jump_cells.append((element[1], element[2]))
        else:
            ref_env.special.append((element[1], element[2]))
    
    return ref_env


def cell_frequency(agent):
    dict_return = {}
    for row in range(env.width):
        for col in range(env.length):
            dict_return[(row, col)] = 0

    ls = agent.env.resettable_states()
    for i in range(len(ls)):
        states = agent.eval(show=False, fixed=ls[i])[2]
        for state in states:
            dict_return[(state[0], state[1])] += 1

    dict_return = sorted(dict_return.items(), key=lambda x: -x[1])
    return dict_return


if __name__ == "__main__":
    agent = w_QAgent(env)
    agent.qlearn(3000, render=False)
    cell_dict = cell_frequency(agent)
    count = 0
    for elem in cell_dict:
        if elem[1] == 0:
            print(elem[0])
    rounds = 300
    mp.set_start_method = "spawn"
    num_processes = 10
    processes = []
    manager = Manager()
    agents = manager.list()
    for i in range(rounds * num_processes):
        agents.append(0)  # keeper

    categories = []
    num_mods = 4

    map_to_numpy = np.asarray(map, dtype="c")
    env = WindyGridworld()  # reference environment

    orig_agent = w_QAgent(env)
    orig_agent.qlearn(3000, render=False)
    cell_dict = cell_frequency(orig_agent)
    modifications = []

    for element in cell_dict:
        if element[1] != 0:
            modifications.append((0, element[0][0], element[0][1]))
            modifications.append((1, element[0][0], element[0][1]))

    modifications.sort()
    ls = None

    if num_mods == 1:
        ls = [[elem] for elem in modifications]
Exemple #11
0
    ls[i] = np.reshape(ls[i], (num_mods * 3))

ls = np.array(ls)
vector = model.predict(ls)

# Keep track of max and corresponding environment
s = vector.shape
a = np.reshape(vector, (s[0] * s[1]))
index = np.argmax(a)

# Vector at index of highest prediction
corr_vec = ls[index]
coor_vec = list(np.reshape(corr_vec, (len(corr_vec) // 3, 3)))

res_env = make_env(env, coor_vec)
agent = w_QAgent(res_env)
agent.qlearn(3000, render=False)
opt_val = utility(agent)

# Re-format found vector
x = copy.deepcopy(coor_vec)
for i in range(len(x)):
    x[i] = tuple(x[i])


r_dir = os.path.abspath(os.pardir)
data_dir = os.path.join(r_dir, "data-wgr")
file_dir = os.path.join(data_dir, "sl_nh_result_{}.txt".format(num_mods))
with open(file_dir, "w") as file:
    file.write("Modifications: ")
    file.write(str(x))
def batch_greedy(env, num_mods, num_mods_per_run, ls_num_iters):
    # Parameters
    # =========================================================
    # env: original environment
    # num_mods: total number of modifications
    # num_mods_per_run: total number of modifications considered in combination
    # ls_num_iters: list of number of iterations run for each number of modifications
    # =========================================================

    # Example: [50, 200] means that if num_mods == 1, run 50 iterations, and if num_mods == 2, run 200 iterations.

    ref = copy.deepcopy(env)
    mods_ret = []  # answer of this algorithm

    # Initialize an MCTS tree
    tree = Tree(env, max_layer=num_mods)
    tree.initialize()

    # Keep a running count
    count = num_mods

    # Keep a running list of modifications
    ls_mods = copy.deepcopy(tree.modifications)

    # Initialize baseline
    baseline = 10.12

    assert (count >= num_mods_per_run)
    assert (len(ls_num_iters) == num_mods_per_run)

    while count > 0:
        print(colored(ls_mods, "red"))
        n = 0
        if count >= num_mods_per_run:
            n = num_mods_per_run

        else:
            n = count

        tree = Tree(ref, max_layer=n)
        tree.initialize()
        tree.threshold = baseline

        tree.modifications = copy.deepcopy(ls_mods)

        # Find out number of iterations
        num_iter = ls_num_iters[n - 1]

        # Perform an MCTS search
        tree.ucb_search(iterations=num_iter)

        a = tree.best_observed_choice()
        for elem in a[0]:
            mods_ret.append(elem)

        # Transform the environment
        for elem in a[0]:
            if elem[0] == 0:  # wall
                ref.jump_cells.append((elem[1], elem[2]))

            elif elem[0] == 1:  # cell
                ref.special.append((elem[1], elem[2]))

            ls_mods.remove(elem)

        count -= n

        # Increase baseline
        baseline += 0.4 * n

    # Find utility
    agent = w_QAgent(ref)
    agent.qlearn(3000)
    rews = utility(agent)

    return (mods_ret, rews)