Example #1
0
def main():
    """
    Runs and trains a Q-learning model.
    :return:
    """
    os.chdir(os.path.dirname(__file__))
    cwd = os.getcwd()

    obs = ObservationObject(0, [
        'd_closest_coin_dir', 'd_closest_safe_field_dir', 'me_has_bomb',
        'dead_end_detect', 'd4_is_safe_to_move_a_l', 'd4_is_safe_to_move_b_r',
        'd4_is_safe_to_move_c_u', 'd4_is_safe_to_move_d_d',
        'd_best_bomb_dropping_dir', 'd_closest_enemy_dir'
    ], None)

    write_path = 'data/qtables/' + obs.get_file_name_string()

    if not os.path.exists(write_path):
        os.makedirs(write_path)

    KNOWN, Q = q_train_from_games_jakob(
        cwd + "/" + 'data/games/four_players_esa_0_2_cratedens_0_75',
        write_path,
        obs,
        a=0.5,
        g=0.7,
        save_every_n_files=5,
        stop_after_n_files=200)

    env = EvaluationEnvironment(["testing_only"], write_path + "/evaluations")
    env.run_trials(add_folder=True)
    env.analyze_games()
Example #2
0
def get_step_count_from_filename(filename: str):
    """
    Given the name of the last numpy file trained with, return the step count of the model up until that point.
    :param filename: Filename (should end in .npy)
    :return:
    """

    obs0 = ObservationObject(0, [
        'd_closest_coin_dir', 'd_closest_safe_field_dir', 'me_has_bomb',
        'dead_end_detect', 'd4_is_safe_to_move_a_l', 'd4_is_safe_to_move_b_r',
        'd4_is_safe_to_move_c_u', 'd4_is_safe_to_move_d_d',
        'd_best_bomb_dropping_dir', 'd_closest_enemy_dir'
    ], None)

    progress_path = "data/qtables/" + obs0.get_file_name_string(
    ) + "/progress.json"
    records_path = "data/qtables/" + obs0.get_file_name_string(
    ) + "/records.json"

    save_every = 5

    with open(progress_path, "r") as f:
        steps, _ = json.load(f)

    with open(records_path, "r") as f:
        names = json.load(f)

    for ind, _ in enumerate(steps):
        if ind != 0:
            steps[ind] += steps[ind - 1]

    target_name_index = None
    for ind, name in enumerate(names):
        if name == filename:
            target_name_index = ind
            break

    if target_name_index is None:
        raise FileNotFoundError(filename)

    target_name_index += 1

    if target_name_index % 5 != 0:
        print("Warning: target name index (starting at 1) is",
              target_name_index)

    step_index = target_name_index // save_every

    return steps[step_index - 1]
Example #3
0
def rewrite_jsons(evaluations_folder: str):
    """
    Intermediate function used to rewrite current_steps.json (originally set to 200, 400, 600 ..)
    :param evaluations_folder Examine all subdirectories here
    :return:
    """

    obs0 = ObservationObject(0, [
        'd_closest_coin_dir', 'd_closest_safe_field_dir', 'me_has_bomb',
        'dead_end_detect', 'd4_is_safe_to_move_a_l', 'd4_is_safe_to_move_b_r',
        'd4_is_safe_to_move_c_u', 'd4_is_safe_to_move_d_d',
        'd_best_bomb_dropping_dir', 'd_closest_enemy_dir'
    ], None)

    evaluations_folder = "data/qtables/" + obs0.get_file_name_string(
    ) + "/evaluations"

    subdirs = [
        os.path.join(evaluations_folder, o)
        for o in os.listdir(evaluations_folder)
        if os.path.isdir(os.path.join(evaluations_folder, o))
    ]

    # train_batch = 200
    save_every = 5

    with open("data/qtables/" + obs0.get_file_name_string() + "/progress.json",
              "r") as f:
        progress, qlen = json.load(f)

    for ind, steps in enumerate(progress):
        if ind != 0:
            progress[ind] += progress[ind - 1]

    for dir in subdirs:
        stepcount_file = dir + "/" + "current_steps.json"

        with open(stepcount_file, "r") as f:

            number_of_files_so_far = json.load(f)

        save_increment_count = number_of_files_so_far // save_every

        with open(stepcount_file, "w") as f:
            json.dump(progress[save_increment_count - 1], f)
Example #4
0
    def _run_training(self, radius, feature_combination: np.array):
        """
        Once training data are set, run Q learning a specific radius and feature combination.
        :raise RuntimeError if training directory not initialized
        :return:
        """

        if self.training_data_dir is None:
            raise RuntimeError("Training directory not set")

        obs = ObservationObject(radius, None, feature_combination)

        self.set_results_output_dir(self.global_output_dir + "/" +
                                    obs.get_file_name_string())

        q_train_from_games(self.training_data_dir, self.results_output_dir,
                           obs)

        self.clear_results_output_dir()
Example #5
0
def main():
    """
    Train and dump regression models from data
    :return:
    """

    obs0 = ObservationObject(0, [
        'd_closest_coin_dir', 'd_closest_safe_field_dir', 'me_has_bomb',
        'dead_end_detect', 'd4_is_safe_to_move_a_l', 'd4_is_safe_to_move_b_r',
        'd4_is_safe_to_move_c_u', 'd4_is_safe_to_move_d_d',
        'd_best_bomb_dropping_dir', 'd_closest_enemy_dir'
    ], None)

    os.chdir(os.path.dirname(__file__))
    cwd = os.getcwd()

    path = cwd + "/data/qtables/" + obs0.get_file_name_string() + "/"

    name = obs0.get_file_name_string(
    )  #"r1_ismal_ismbr_ismcu_ismdd_bbdd_ccdir_ced_csfdir_ded_mhb"

    obspath = path + "observation-" + name + ".npy"
    qpath = path + "q_table-" + name + ".npy"
    quantpath = path + "quantity-" + name + ".npy"

    outdir = "data/regression_results"

    cutoff = 20

    # params = (obspath, qpath, quantpath, outdir, cutoff)

    dt = train_decision_tree(obspath, qpath, quantpath,
                             outdir + "/2019-03-19 17-12-25_2258.dt.p", cutoff)
    #df = train_decision_forest(obspath, qpath, quantpath, outdir + "/df.p", cutoff)
    kr = train_kernel_regression(obspath, qpath, quantpath,
                                 outdir + "/2019-03-19 17-12-25_2258.kr.p",
                                 cutoff)

    print()
Example #6
0
def setup(agent):
    agent.obs_object = ObservationObject(
        1,
        [
            'd_closest_coin_dir',
            'd_closest_safe_field_dir',
            'me_has_bomb',
            'dead_end_detect',
            'd4_is_safe_to_move_a_l',
            'd4_is_safe_to_move_b_r',
            'd4_is_safe_to_move_c_u',
            'd4_is_safe_to_move_d_d',
            'd_best_bomb_dropping_dir',
            # 'd_closest_enemy_dir'
            # 'd_closest_crate_dir',
        ],
        None)
Example #7
0
def q_train_from_games_jakob(train_data,
                             write_path,
                             obs: ObservationObject,
                             a=0.5,
                             g=0.6):
    """
    Trains from all files in a directory using an existing q- and observation-table under write_path.

    If tables do not exist, creates them.

    Creates json files indexing known training files under write_path.
    
    Uses preconfigured ObservationObject to train. 

    :param train_data: Directory containing training episodes
    :param write_path: Directory to which to output Q-learning results and json files.
    :param obs Observation Object containing training settings (view radius etc.)
    :param a alpha (learning rate)
    :param g gamma (discount)
    :return:
    """

    filename = obs.get_file_name_string()
    try:
        QTABLE = np.load(write_path + '/q_table-' + filename)
        KNOWN = np.load(write_path + '/observation-' + filename)
    except:
        print("Error loading learned q table. using empty table instead.")
        QTABLE = np.zeros([0, 6])
        KNOWN = np.zeros([0, obs.obs_length])

    for file in [
            f for f in listdir(train_data) if isfile(join(train_data, f))
    ]:
        # go through files

        try:
            if is_trained(write_path + "/records.json", file):
                print("Skipping known training datum", file, "in folder",
                      write_path)
                continue
        except IOError:
            print("Error accessing .json records for file", file, "in folder",
                  train_data)

        try:
            game = np.load(train_data + "/" + file)
        except OSError:
            print("Skipping " + file + ". Is it a .npy file?")
            continue

        these_actions = np.zeros(4)

        last_index = [None, None, None, None]

        for ind, step_state in enumerate(game):

            obs.set_state(step_state)
            living_players = np.arange(4)[np.where(obs.player_locs != 0)]

            last_actions = these_actions.astype(int)

            for player in range(4):
                actions_taken = step_state[int(obs.board.shape[0] + 4 +
                                               player *
                                               21):int(obs.board.shape[0] + 9 +
                                                       player * 21)]
                actions_taken = np.append(
                    actions_taken,
                    step_state[int(obs.board.shape[0] + 11 + player * 21)])
                these_actions[player] = np.argmax(actions_taken)

                if ind != 0 and player in living_players and actions_taken[
                        np.where(actions_taken != 0)].shape[0] != 1:
                    print(
                        "Warning: Incorrect number of actions taken in one step for player index",
                        player, "in step"
                        " number", ind, "in file", file)

            step_observations = obs.create_observation(living_players)

            for count, observation in enumerate(step_observations):

                findings = np.searchsorted(KNOWN, observation)

                if KNOWN.shape[0] > 0 and np.array_equal(
                        KNOWN[findings], observation
                ):  # if we were able to locate the observation in the table

                    candidates, rotations_current = get_transformations(
                        observation, obs.radius,
                        obs.get_direction_sensitivity())

                    sort = np.argsort(candidates)
                    candidates = candidates[sort]
                    rotations_current = rotations_current[sort]

                    index_current = np.zeros(8)

                    index_current[7] = findings

                    for ind, cand in enumerate(candidates):

                        if ind == 7:
                            continue  # last candidate is original observation

                        cand_pos = np.searchsorted(KNOWN, cand)

                        index_current[ind] = cand_pos

                else:  # we were unable to find the observation
                    new_obs, rotations_current = get_transformations(
                        observation, obs.radius,
                        obs.get_direction_sensitivity())
                    sort = np.argsort(new_obs)
                    new_obs = new_obs[sort]
                    rotations_current = rotations_current[sort]

                    n_new_indices = new_obs.shape[0]
                    insertion_points = np.array(
                        [np.searchsorted(KNOWN, obs) for obs in new_obs])

                    KNOWN = np.insert(KNOWN, insertion_points, new_obs)
                    QTABLE = np.insert(
                        QTABLE, insertion_points,
                        np.zeros((n_new_indices, QTABLE.shape[1])))

                    index_current = np.array([
                        ind + i
                        for (ind,
                             i) in zip(insertion_points, range(n_new_indices))
                    ])

                if ind > 0:
                    for i in range(
                            last_index[living_players[count]][0].shape[0]):
                        l_ind, l_rot = last_index[living_players[count]][0][
                            i], last_index[living_players[count]][1][i]
                        best_choice_current_state = np.max(QTABLE[int(
                            index_current[0])])

                        QTABLE[int(l_ind), int(l_rot[int(last_actions[living_players[count]])])] = (1 - a) *  \
                        QTABLE[int(l_ind), int(l_rot[int(last_actions[living_players[count]])])] +\
                        a * (get_reward(step_state, living_players[count]) + g * best_choice_current_state)

                last_index[living_players[count]] = (index_current,
                                                     rotations_current)

        add_to_trained(write_path + "/records.json", file)  # update json table

        print("Trained with file", file)

        np.save(write_path + '/observation-' + filename, KNOWN)
        np.save(write_path + '/q_table-' + filename, QTABLE)

    return KNOWN, QTABLE
Example #8
0
def main():
    """
    Load and shows a progress file.
    :return:
    """

    obs0 = ObservationObject(
        0,
        [
            'd_closest_coin_dir',
            'd_closest_safe_field_dir',
            'd_best_bomb_dropping_dir',
            'me_has_bomb',
            'd4_is_safe_to_move_a_l',
            'd4_is_safe_to_move_b_r',
            'd4_is_safe_to_move_c_u',
            'd4_is_safe_to_move_d_d',
            'dead_end_detect',
            #'d_closest_crate_dir',
            #'d_closest_enemy_dir'
        ],
        None)

    obs1 = ObservationObject(
        1,
        [
            'd_closest_coin_dir',
            'd_closest_safe_field_dir',
            'd_best_bomb_dropping_dir',
            'me_has_bomb',
            'd4_is_safe_to_move_a_l',
            'd4_is_safe_to_move_b_r',
            'd4_is_safe_to_move_c_u',
            'd4_is_safe_to_move_d_d',
            'dead_end_detect',
            # 'd_closest_crate_dir',
            'd_closest_enemy_dir'
        ],
        None)

    obs3 = ObservationObject(
        3,
        [
            'd_closest_coin_dir',
            'd_closest_safe_field_dir',
            #'d_best_bomb_dropping_dir',
            'me_has_bomb',
            'd4_is_safe_to_move_a_l',
            'd4_is_safe_to_move_b_r',
            'd4_is_safe_to_move_c_u',
            'd4_is_safe_to_move_d_d',
            #'dead_end_detect',
            # 'd_closest_crate_dir',
            #'d_closest_enemy_dir'
        ],
        None)

    obss = [obs0, obs1, obs3]
    feats = [(0, 6, 3), (1, 6, 4), (3, 5, 2)]

    #f, (ax1, ax2, ax3) = plt.subplots(1, 3, sharey=False)
    #axes = (ax1, ax2, ax3)
    plt.rcParams.update({'font.size': 16})
    #matplotlib.rc('xtick', labelsize=22)
    #matplotlib.rc('ytick', labelsize=22)
    labelsize = 16
    #f.suptitle('Effects of view window size on Q Table length')

    #form = ScalarFormatter(style='sci', scilimits=(-2, 20))

    for ind, obs in enumerate(obss):

        filepath = "data/qtables/" + obs.get_file_name_string() + "/"

        if ind == 0:
            filepath = filepath[:-1] + "OLD/"  #

        # db = view_db(filepath+"observation-"+obs.get_file_name_string()+".npy")

        #ax = axes[ind]

        radius, binary, dir = feats[ind]

        title = "Radius "+str(radius) + " with " + str(binary) + " binary features \nand " + str(dir) + \
                " directional features"

        # ax.set_title(title)
        # ax.set_xlabel("Number of steps seen")
        # if ind == 0:
        #     ax.set_ylabel("Length of Q Table")
        #
        # x, y = set_progress_chart(filepath)
        # x = np.concatenate((np.array([0]), x))
        # y = np.concatenate((np.array([0]), y))
        # ax.plot(x, y)
        # ax.xaxis.offsetText.set_fontsize(labelsize)
        # ax.yaxis.offsetText.set_fontsize(labelsize)
        # plt.sca(ax)
        plt.xticks(fontsize=labelsize)
        plt.yticks(fontsize=labelsize)
        # plt.ticklabel_format(style='sci', axis='both', scilimits=(0, 0))

        #plt.savefig("data/image_outputs/compare_qlengths.svg", format="svg")
        #plt.show()

        #ax.show()

        plt.title(title)
        set_quantities_histogram(filepath + "quantity-" +
                                 obs.get_file_name_string() + ".npy")
        plt.xlabel("Observation frequency")
        plt.ylabel("Count")
        plt.ticklabel_format(style='sci', axis='x', scilimits=(0, 0))

        plt.show()
Example #9
0
import json
import numpy as np
from state_functions.rewards import event_rewards
import matplotlib.pyplot as plt

from evaluation_environment import EvaluationEnvironment
from agent_code.observation_object import ObservationObject

obs0 = ObservationObject(0, [
    'd_closest_coin_dir', 'd_closest_safe_field_dir', 'me_has_bomb',
    'dead_end_detect', 'd4_is_safe_to_move_a_l', 'd4_is_safe_to_move_b_r',
    'd4_is_safe_to_move_c_u', 'd4_is_safe_to_move_d_d',
    'd_best_bomb_dropping_dir', 'd_closest_enemy_dir'
], None)


def main():

    env = EvaluationEnvironment(["testing_only"], "data/qtables/" +
                                obs0.get_file_name_string() + "/reg_evals")
    env.run_trials(add_folder=True)
    _, _, events_path, durations_path = env.analyze_games()

    events = np.load(events_path)
    durations = np.load(durations_path)

    rewards = [
        np.sum(player_events * event_rewards) for player_events in events
    ]

Example #10
0
    def analyze_games(self,
                      destroy_data: bool = False,
                      print_steps_trained_with=None):
        """
        Analyze all games in self.save_directory and save a new .json summary file there.
        :param destroy_data: If True, delete games after analysis
        :param print_steps_trained_with: If not None, print the current number of steps the model under this filepath has trained with.
        :return: event_count_by_game, game_durations, events_path, durations_path
        """
        files = [
            f for f in listdir(self.save_directory)
            if isfile(join(self.save_directory, f))
        ]

        event_count_by_game = []  # store event counts here

        game_durations = []

        for file in files:
            if file in ("events.npy", "durations.npy"):
                continue
            eventcount = np.zeros((len(self.agent_names), 17)).astype(int)
            try:
                game = np.load(self.save_directory + "/" + file)
            except OSError:
                print("Skipping " + file + ". Is it a .npy file?")
                continue

            obs = ObservationObject(1, [], None)

            step_num = 0
            for step in game:
                obs.set_state(step)
                playersliving = False

                for player in range(len(self.agent_names)
                                    ):  # track events for agents of interest
                    if player in obs.living_players or player in obs.just_died:
                        eventcount[player] += obs.events[player]
                        playersliving = True

                step_num += 1
                if not playersliving:
                    break  # all players of interest are dead
            game_durations.append(step_num)
            event_count_by_game.append(eventcount)

        event_count_by_game, game_durations = np.array(
            [ec[0] for ec in event_count_by_game]), np.array(game_durations)
        events_path = self.save_directory + "/" + "events.npy"
        durations_path = self.save_directory + "/" + "durations.npy"

        np.save(events_path, event_count_by_game)
        np.save(durations_path, game_durations)

        if print_steps_trained_with is not None:
            steps_trained_path = self.save_directory + "/" + "current_steps.json"
            with open(steps_trained_path, "w") as f:
                json.dump(
                    self.return_steps_trained_with(print_steps_trained_with),
                    f)

        print("Wrote game info to", events_path, "and", durations_path)

        if destroy_data:  # remove files from disk
            print("Removing game data")
            for file in files:
                remove(self.save_directory + "/" + file)

        return event_count_by_game, game_durations, events_path, durations_path
Example #11
0
def main(data_path='data/games/four_players_esa_0_2_cratedens_0_75',
         train_iterations=40,
         train_batch_size=10
         ):  # ='data/games/four_players_esa_0_2_cratedens_0_75/'):
    """
    Train an agent from the ground up and evaluate their performance every few games.
    Saves all files in a subdirectory of the agent's folder.

    Supports training + evaluation from pre-existing data (e.g. simple agents), but also training through self-play.
    :param data_path: If not None, take training data from here (should contain multiples of 100 games) -> Else perform
    self-play to learn
    :param train_iterations: How many training cycles to go through
    :param train_batch_size: How many files to train from in one cycle
    :return:
    """

    obs = ObservationObject(
        0,
        [
            'd_closest_coin_dir',
            'd_closest_safe_field_dir',
            'd_best_bomb_dropping_dir',
            #'d_closest_crate_dir',
            'me_has_bomb',
            'd4_is_safe_to_move_a_l',
            'd4_is_safe_to_move_b_r',
            'd4_is_safe_to_move_c_u',
            'd4_is_safe_to_move_d_d',
            'dead_end_detect',
        ],
        None)
    if data_path is None:
        create_data = True
        _data_path = "data/games/SELFPLAY" + obs.get_file_name_string()

    else:
        create_data = False
        _data_path = None

    evaluation_data_path = 'data/qtables/' + obs.get_file_name_string(
    ) + "/evaluategames" + datetime.datetime.fromtimestamp(
        time()).strftime('%Y-%m-%d %H-%M-%S')

    # where to put the evaluation games

    for i in range(train_iterations):

        iteration_str = "/ITERATION_" + str(i + 1)

        if create_data:

            data_path = _data_path + iteration_str

            training_setup = [('testing_only', False), ('testing_only', False),
                              ('testing_only', False), ('testing_only', False)]

            print("Creating", s.n_rounds,
                  "training episodes by playing agent against itself")

            main_save_states.main(
                training_setup, data_path
            )  # Important: Set settings.py n_rounds to train batch size

        if create_data:
            print("Training from", train_batch_size, "newly played games in",
                  data_path)
        else:
            print("Training from", train_batch_size, "pre-computed games in",
                  data_path)

        q_train_from_games_jakob(data_path,
                                 "data/qtables/" + obs.get_file_name_string(),
                                 obs,
                                 a=0.5,
                                 g=0.5,
                                 stop_after_n_files=train_batch_size,
                                 save_every_n_files=5)

        if i % 2 == 0:
            q_table_loc = "data/qtables/" + obs.get_file_name_string()

            iter_output = evaluation_data_path + iteration_str

            env = EvaluationEnvironment(["testing_only"], iter_output)

            print("Running", sae_s.n_rounds, "trials vs. simple agents")

            env.run_trials()

            env.analyze_games(destroy_data=False,
                              print_steps_trained_with=q_table_loc
                              )  # save an analysis in iter_output,