Ejemplo n.º 1
0
def get_step_count_from_filename(filename: str):
    """
    Given the name of the last numpy file trained with, return the step count of the model up until that point.
    :param filename: Filename (should end in .npy)
    :return:
    """

    obs0 = ObservationObject(0, [
        'd_closest_coin_dir', 'd_closest_safe_field_dir', 'me_has_bomb',
        'dead_end_detect', 'd4_is_safe_to_move_a_l', 'd4_is_safe_to_move_b_r',
        'd4_is_safe_to_move_c_u', 'd4_is_safe_to_move_d_d',
        'd_best_bomb_dropping_dir', 'd_closest_enemy_dir'
    ], None)

    progress_path = "data/qtables/" + obs0.get_file_name_string(
    ) + "/progress.json"
    records_path = "data/qtables/" + obs0.get_file_name_string(
    ) + "/records.json"

    save_every = 5

    with open(progress_path, "r") as f:
        steps, _ = json.load(f)

    with open(records_path, "r") as f:
        names = json.load(f)

    for ind, _ in enumerate(steps):
        if ind != 0:
            steps[ind] += steps[ind - 1]

    target_name_index = None
    for ind, name in enumerate(names):
        if name == filename:
            target_name_index = ind
            break

    if target_name_index is None:
        raise FileNotFoundError(filename)

    target_name_index += 1

    if target_name_index % 5 != 0:
        print("Warning: target name index (starting at 1) is",
              target_name_index)

    step_index = target_name_index // save_every

    return steps[step_index - 1]
Ejemplo n.º 2
0
def main():
    """
    Runs and trains a Q-learning model.
    :return:
    """
    os.chdir(os.path.dirname(__file__))
    cwd = os.getcwd()

    obs = ObservationObject(0, [
        'd_closest_coin_dir', 'd_closest_safe_field_dir', 'me_has_bomb',
        'dead_end_detect', 'd4_is_safe_to_move_a_l', 'd4_is_safe_to_move_b_r',
        'd4_is_safe_to_move_c_u', 'd4_is_safe_to_move_d_d',
        'd_best_bomb_dropping_dir', 'd_closest_enemy_dir'
    ], None)

    write_path = 'data/qtables/' + obs.get_file_name_string()

    if not os.path.exists(write_path):
        os.makedirs(write_path)

    KNOWN, Q = q_train_from_games_jakob(
        cwd + "/" + 'data/games/four_players_esa_0_2_cratedens_0_75',
        write_path,
        obs,
        a=0.5,
        g=0.7,
        save_every_n_files=5,
        stop_after_n_files=200)

    env = EvaluationEnvironment(["testing_only"], write_path + "/evaluations")
    env.run_trials(add_folder=True)
    env.analyze_games()
Ejemplo n.º 3
0
def rewrite_jsons(evaluations_folder: str):
    """
    Intermediate function used to rewrite current_steps.json (originally set to 200, 400, 600 ..)
    :param evaluations_folder Examine all subdirectories here
    :return:
    """

    obs0 = ObservationObject(0, [
        'd_closest_coin_dir', 'd_closest_safe_field_dir', 'me_has_bomb',
        'dead_end_detect', 'd4_is_safe_to_move_a_l', 'd4_is_safe_to_move_b_r',
        'd4_is_safe_to_move_c_u', 'd4_is_safe_to_move_d_d',
        'd_best_bomb_dropping_dir', 'd_closest_enemy_dir'
    ], None)

    evaluations_folder = "data/qtables/" + obs0.get_file_name_string(
    ) + "/evaluations"

    subdirs = [
        os.path.join(evaluations_folder, o)
        for o in os.listdir(evaluations_folder)
        if os.path.isdir(os.path.join(evaluations_folder, o))
    ]

    # train_batch = 200
    save_every = 5

    with open("data/qtables/" + obs0.get_file_name_string() + "/progress.json",
              "r") as f:
        progress, qlen = json.load(f)

    for ind, steps in enumerate(progress):
        if ind != 0:
            progress[ind] += progress[ind - 1]

    for dir in subdirs:
        stepcount_file = dir + "/" + "current_steps.json"

        with open(stepcount_file, "r") as f:

            number_of_files_so_far = json.load(f)

        save_increment_count = number_of_files_so_far // save_every

        with open(stepcount_file, "w") as f:
            json.dump(progress[save_increment_count - 1], f)
Ejemplo n.º 4
0
def main():
    """
    Train and dump regression models from data
    :return:
    """

    obs0 = ObservationObject(0, [
        'd_closest_coin_dir', 'd_closest_safe_field_dir', 'me_has_bomb',
        'dead_end_detect', 'd4_is_safe_to_move_a_l', 'd4_is_safe_to_move_b_r',
        'd4_is_safe_to_move_c_u', 'd4_is_safe_to_move_d_d',
        'd_best_bomb_dropping_dir', 'd_closest_enemy_dir'
    ], None)

    os.chdir(os.path.dirname(__file__))
    cwd = os.getcwd()

    path = cwd + "/data/qtables/" + obs0.get_file_name_string() + "/"

    name = obs0.get_file_name_string(
    )  #"r1_ismal_ismbr_ismcu_ismdd_bbdd_ccdir_ced_csfdir_ded_mhb"

    obspath = path + "observation-" + name + ".npy"
    qpath = path + "q_table-" + name + ".npy"
    quantpath = path + "quantity-" + name + ".npy"

    outdir = "data/regression_results"

    cutoff = 20

    # params = (obspath, qpath, quantpath, outdir, cutoff)

    dt = train_decision_tree(obspath, qpath, quantpath,
                             outdir + "/2019-03-19 17-12-25_2258.dt.p", cutoff)
    #df = train_decision_forest(obspath, qpath, quantpath, outdir + "/df.p", cutoff)
    kr = train_kernel_regression(obspath, qpath, quantpath,
                                 outdir + "/2019-03-19 17-12-25_2258.kr.p",
                                 cutoff)

    print()
Ejemplo n.º 5
0
    def _run_training(self, radius, feature_combination: np.array):
        """
        Once training data are set, run Q learning a specific radius and feature combination.
        :raise RuntimeError if training directory not initialized
        :return:
        """

        if self.training_data_dir is None:
            raise RuntimeError("Training directory not set")

        obs = ObservationObject(radius, None, feature_combination)

        self.set_results_output_dir(self.global_output_dir + "/" +
                                    obs.get_file_name_string())

        q_train_from_games(self.training_data_dir, self.results_output_dir,
                           obs)

        self.clear_results_output_dir()
Ejemplo n.º 6
0
def q_train_from_games_jakob(train_data,
                             write_path,
                             obs: ObservationObject,
                             a=0.5,
                             g=0.6):
    """
    Trains from all files in a directory using an existing q- and observation-table under write_path.

    If tables do not exist, creates them.

    Creates json files indexing known training files under write_path.
    
    Uses preconfigured ObservationObject to train. 

    :param train_data: Directory containing training episodes
    :param write_path: Directory to which to output Q-learning results and json files.
    :param obs Observation Object containing training settings (view radius etc.)
    :param a alpha (learning rate)
    :param g gamma (discount)
    :return:
    """

    filename = obs.get_file_name_string()
    try:
        QTABLE = np.load(write_path + '/q_table-' + filename)
        KNOWN = np.load(write_path + '/observation-' + filename)
    except:
        print("Error loading learned q table. using empty table instead.")
        QTABLE = np.zeros([0, 6])
        KNOWN = np.zeros([0, obs.obs_length])

    for file in [
            f for f in listdir(train_data) if isfile(join(train_data, f))
    ]:
        # go through files

        try:
            if is_trained(write_path + "/records.json", file):
                print("Skipping known training datum", file, "in folder",
                      write_path)
                continue
        except IOError:
            print("Error accessing .json records for file", file, "in folder",
                  train_data)

        try:
            game = np.load(train_data + "/" + file)
        except OSError:
            print("Skipping " + file + ". Is it a .npy file?")
            continue

        these_actions = np.zeros(4)

        last_index = [None, None, None, None]

        for ind, step_state in enumerate(game):

            obs.set_state(step_state)
            living_players = np.arange(4)[np.where(obs.player_locs != 0)]

            last_actions = these_actions.astype(int)

            for player in range(4):
                actions_taken = step_state[int(obs.board.shape[0] + 4 +
                                               player *
                                               21):int(obs.board.shape[0] + 9 +
                                                       player * 21)]
                actions_taken = np.append(
                    actions_taken,
                    step_state[int(obs.board.shape[0] + 11 + player * 21)])
                these_actions[player] = np.argmax(actions_taken)

                if ind != 0 and player in living_players and actions_taken[
                        np.where(actions_taken != 0)].shape[0] != 1:
                    print(
                        "Warning: Incorrect number of actions taken in one step for player index",
                        player, "in step"
                        " number", ind, "in file", file)

            step_observations = obs.create_observation(living_players)

            for count, observation in enumerate(step_observations):

                findings = np.searchsorted(KNOWN, observation)

                if KNOWN.shape[0] > 0 and np.array_equal(
                        KNOWN[findings], observation
                ):  # if we were able to locate the observation in the table

                    candidates, rotations_current = get_transformations(
                        observation, obs.radius,
                        obs.get_direction_sensitivity())

                    sort = np.argsort(candidates)
                    candidates = candidates[sort]
                    rotations_current = rotations_current[sort]

                    index_current = np.zeros(8)

                    index_current[7] = findings

                    for ind, cand in enumerate(candidates):

                        if ind == 7:
                            continue  # last candidate is original observation

                        cand_pos = np.searchsorted(KNOWN, cand)

                        index_current[ind] = cand_pos

                else:  # we were unable to find the observation
                    new_obs, rotations_current = get_transformations(
                        observation, obs.radius,
                        obs.get_direction_sensitivity())
                    sort = np.argsort(new_obs)
                    new_obs = new_obs[sort]
                    rotations_current = rotations_current[sort]

                    n_new_indices = new_obs.shape[0]
                    insertion_points = np.array(
                        [np.searchsorted(KNOWN, obs) for obs in new_obs])

                    KNOWN = np.insert(KNOWN, insertion_points, new_obs)
                    QTABLE = np.insert(
                        QTABLE, insertion_points,
                        np.zeros((n_new_indices, QTABLE.shape[1])))

                    index_current = np.array([
                        ind + i
                        for (ind,
                             i) in zip(insertion_points, range(n_new_indices))
                    ])

                if ind > 0:
                    for i in range(
                            last_index[living_players[count]][0].shape[0]):
                        l_ind, l_rot = last_index[living_players[count]][0][
                            i], last_index[living_players[count]][1][i]
                        best_choice_current_state = np.max(QTABLE[int(
                            index_current[0])])

                        QTABLE[int(l_ind), int(l_rot[int(last_actions[living_players[count]])])] = (1 - a) *  \
                        QTABLE[int(l_ind), int(l_rot[int(last_actions[living_players[count]])])] +\
                        a * (get_reward(step_state, living_players[count]) + g * best_choice_current_state)

                last_index[living_players[count]] = (index_current,
                                                     rotations_current)

        add_to_trained(write_path + "/records.json", file)  # update json table

        print("Trained with file", file)

        np.save(write_path + '/observation-' + filename, KNOWN)
        np.save(write_path + '/q_table-' + filename, QTABLE)

    return KNOWN, QTABLE
Ejemplo n.º 7
0
def main(data_path='data/games/four_players_esa_0_2_cratedens_0_75',
         train_iterations=40,
         train_batch_size=10
         ):  # ='data/games/four_players_esa_0_2_cratedens_0_75/'):
    """
    Train an agent from the ground up and evaluate their performance every few games.
    Saves all files in a subdirectory of the agent's folder.

    Supports training + evaluation from pre-existing data (e.g. simple agents), but also training through self-play.
    :param data_path: If not None, take training data from here (should contain multiples of 100 games) -> Else perform
    self-play to learn
    :param train_iterations: How many training cycles to go through
    :param train_batch_size: How many files to train from in one cycle
    :return:
    """

    obs = ObservationObject(
        0,
        [
            'd_closest_coin_dir',
            'd_closest_safe_field_dir',
            'd_best_bomb_dropping_dir',
            #'d_closest_crate_dir',
            'me_has_bomb',
            'd4_is_safe_to_move_a_l',
            'd4_is_safe_to_move_b_r',
            'd4_is_safe_to_move_c_u',
            'd4_is_safe_to_move_d_d',
            'dead_end_detect',
        ],
        None)
    if data_path is None:
        create_data = True
        _data_path = "data/games/SELFPLAY" + obs.get_file_name_string()

    else:
        create_data = False
        _data_path = None

    evaluation_data_path = 'data/qtables/' + obs.get_file_name_string(
    ) + "/evaluategames" + datetime.datetime.fromtimestamp(
        time()).strftime('%Y-%m-%d %H-%M-%S')

    # where to put the evaluation games

    for i in range(train_iterations):

        iteration_str = "/ITERATION_" + str(i + 1)

        if create_data:

            data_path = _data_path + iteration_str

            training_setup = [('testing_only', False), ('testing_only', False),
                              ('testing_only', False), ('testing_only', False)]

            print("Creating", s.n_rounds,
                  "training episodes by playing agent against itself")

            main_save_states.main(
                training_setup, data_path
            )  # Important: Set settings.py n_rounds to train batch size

        if create_data:
            print("Training from", train_batch_size, "newly played games in",
                  data_path)
        else:
            print("Training from", train_batch_size, "pre-computed games in",
                  data_path)

        q_train_from_games_jakob(data_path,
                                 "data/qtables/" + obs.get_file_name_string(),
                                 obs,
                                 a=0.5,
                                 g=0.5,
                                 stop_after_n_files=train_batch_size,
                                 save_every_n_files=5)

        if i % 2 == 0:
            q_table_loc = "data/qtables/" + obs.get_file_name_string()

            iter_output = evaluation_data_path + iteration_str

            env = EvaluationEnvironment(["testing_only"], iter_output)

            print("Running", sae_s.n_rounds, "trials vs. simple agents")

            env.run_trials()

            env.analyze_games(destroy_data=False,
                              print_steps_trained_with=q_table_loc
                              )  # save an analysis in iter_output,