def main(): """ Runs and trains a Q-learning model. :return: """ os.chdir(os.path.dirname(__file__)) cwd = os.getcwd() obs = ObservationObject(0, [ 'd_closest_coin_dir', 'd_closest_safe_field_dir', 'me_has_bomb', 'dead_end_detect', 'd4_is_safe_to_move_a_l', 'd4_is_safe_to_move_b_r', 'd4_is_safe_to_move_c_u', 'd4_is_safe_to_move_d_d', 'd_best_bomb_dropping_dir', 'd_closest_enemy_dir' ], None) write_path = 'data/qtables/' + obs.get_file_name_string() if not os.path.exists(write_path): os.makedirs(write_path) KNOWN, Q = q_train_from_games_jakob( cwd + "/" + 'data/games/four_players_esa_0_2_cratedens_0_75', write_path, obs, a=0.5, g=0.7, save_every_n_files=5, stop_after_n_files=200) env = EvaluationEnvironment(["testing_only"], write_path + "/evaluations") env.run_trials(add_folder=True) env.analyze_games()
def get_step_count_from_filename(filename: str): """ Given the name of the last numpy file trained with, return the step count of the model up until that point. :param filename: Filename (should end in .npy) :return: """ obs0 = ObservationObject(0, [ 'd_closest_coin_dir', 'd_closest_safe_field_dir', 'me_has_bomb', 'dead_end_detect', 'd4_is_safe_to_move_a_l', 'd4_is_safe_to_move_b_r', 'd4_is_safe_to_move_c_u', 'd4_is_safe_to_move_d_d', 'd_best_bomb_dropping_dir', 'd_closest_enemy_dir' ], None) progress_path = "data/qtables/" + obs0.get_file_name_string( ) + "/progress.json" records_path = "data/qtables/" + obs0.get_file_name_string( ) + "/records.json" save_every = 5 with open(progress_path, "r") as f: steps, _ = json.load(f) with open(records_path, "r") as f: names = json.load(f) for ind, _ in enumerate(steps): if ind != 0: steps[ind] += steps[ind - 1] target_name_index = None for ind, name in enumerate(names): if name == filename: target_name_index = ind break if target_name_index is None: raise FileNotFoundError(filename) target_name_index += 1 if target_name_index % 5 != 0: print("Warning: target name index (starting at 1) is", target_name_index) step_index = target_name_index // save_every return steps[step_index - 1]
def rewrite_jsons(evaluations_folder: str): """ Intermediate function used to rewrite current_steps.json (originally set to 200, 400, 600 ..) :param evaluations_folder Examine all subdirectories here :return: """ obs0 = ObservationObject(0, [ 'd_closest_coin_dir', 'd_closest_safe_field_dir', 'me_has_bomb', 'dead_end_detect', 'd4_is_safe_to_move_a_l', 'd4_is_safe_to_move_b_r', 'd4_is_safe_to_move_c_u', 'd4_is_safe_to_move_d_d', 'd_best_bomb_dropping_dir', 'd_closest_enemy_dir' ], None) evaluations_folder = "data/qtables/" + obs0.get_file_name_string( ) + "/evaluations" subdirs = [ os.path.join(evaluations_folder, o) for o in os.listdir(evaluations_folder) if os.path.isdir(os.path.join(evaluations_folder, o)) ] # train_batch = 200 save_every = 5 with open("data/qtables/" + obs0.get_file_name_string() + "/progress.json", "r") as f: progress, qlen = json.load(f) for ind, steps in enumerate(progress): if ind != 0: progress[ind] += progress[ind - 1] for dir in subdirs: stepcount_file = dir + "/" + "current_steps.json" with open(stepcount_file, "r") as f: number_of_files_so_far = json.load(f) save_increment_count = number_of_files_so_far // save_every with open(stepcount_file, "w") as f: json.dump(progress[save_increment_count - 1], f)
def _run_training(self, radius, feature_combination: np.array): """ Once training data are set, run Q learning a specific radius and feature combination. :raise RuntimeError if training directory not initialized :return: """ if self.training_data_dir is None: raise RuntimeError("Training directory not set") obs = ObservationObject(radius, None, feature_combination) self.set_results_output_dir(self.global_output_dir + "/" + obs.get_file_name_string()) q_train_from_games(self.training_data_dir, self.results_output_dir, obs) self.clear_results_output_dir()
def main(): """ Train and dump regression models from data :return: """ obs0 = ObservationObject(0, [ 'd_closest_coin_dir', 'd_closest_safe_field_dir', 'me_has_bomb', 'dead_end_detect', 'd4_is_safe_to_move_a_l', 'd4_is_safe_to_move_b_r', 'd4_is_safe_to_move_c_u', 'd4_is_safe_to_move_d_d', 'd_best_bomb_dropping_dir', 'd_closest_enemy_dir' ], None) os.chdir(os.path.dirname(__file__)) cwd = os.getcwd() path = cwd + "/data/qtables/" + obs0.get_file_name_string() + "/" name = obs0.get_file_name_string( ) #"r1_ismal_ismbr_ismcu_ismdd_bbdd_ccdir_ced_csfdir_ded_mhb" obspath = path + "observation-" + name + ".npy" qpath = path + "q_table-" + name + ".npy" quantpath = path + "quantity-" + name + ".npy" outdir = "data/regression_results" cutoff = 20 # params = (obspath, qpath, quantpath, outdir, cutoff) dt = train_decision_tree(obspath, qpath, quantpath, outdir + "/2019-03-19 17-12-25_2258.dt.p", cutoff) #df = train_decision_forest(obspath, qpath, quantpath, outdir + "/df.p", cutoff) kr = train_kernel_regression(obspath, qpath, quantpath, outdir + "/2019-03-19 17-12-25_2258.kr.p", cutoff) print()
def setup(agent): agent.obs_object = ObservationObject( 1, [ 'd_closest_coin_dir', 'd_closest_safe_field_dir', 'me_has_bomb', 'dead_end_detect', 'd4_is_safe_to_move_a_l', 'd4_is_safe_to_move_b_r', 'd4_is_safe_to_move_c_u', 'd4_is_safe_to_move_d_d', 'd_best_bomb_dropping_dir', # 'd_closest_enemy_dir' # 'd_closest_crate_dir', ], None)
def q_train_from_games_jakob(train_data, write_path, obs: ObservationObject, a=0.5, g=0.6): """ Trains from all files in a directory using an existing q- and observation-table under write_path. If tables do not exist, creates them. Creates json files indexing known training files under write_path. Uses preconfigured ObservationObject to train. :param train_data: Directory containing training episodes :param write_path: Directory to which to output Q-learning results and json files. :param obs Observation Object containing training settings (view radius etc.) :param a alpha (learning rate) :param g gamma (discount) :return: """ filename = obs.get_file_name_string() try: QTABLE = np.load(write_path + '/q_table-' + filename) KNOWN = np.load(write_path + '/observation-' + filename) except: print("Error loading learned q table. using empty table instead.") QTABLE = np.zeros([0, 6]) KNOWN = np.zeros([0, obs.obs_length]) for file in [ f for f in listdir(train_data) if isfile(join(train_data, f)) ]: # go through files try: if is_trained(write_path + "/records.json", file): print("Skipping known training datum", file, "in folder", write_path) continue except IOError: print("Error accessing .json records for file", file, "in folder", train_data) try: game = np.load(train_data + "/" + file) except OSError: print("Skipping " + file + ". Is it a .npy file?") continue these_actions = np.zeros(4) last_index = [None, None, None, None] for ind, step_state in enumerate(game): obs.set_state(step_state) living_players = np.arange(4)[np.where(obs.player_locs != 0)] last_actions = these_actions.astype(int) for player in range(4): actions_taken = step_state[int(obs.board.shape[0] + 4 + player * 21):int(obs.board.shape[0] + 9 + player * 21)] actions_taken = np.append( actions_taken, step_state[int(obs.board.shape[0] + 11 + player * 21)]) these_actions[player] = np.argmax(actions_taken) if ind != 0 and player in living_players and actions_taken[ np.where(actions_taken != 0)].shape[0] != 1: print( "Warning: Incorrect number of actions taken in one step for player index", player, "in step" " number", ind, "in file", file) step_observations = obs.create_observation(living_players) for count, observation in enumerate(step_observations): findings = np.searchsorted(KNOWN, observation) if KNOWN.shape[0] > 0 and np.array_equal( KNOWN[findings], observation ): # if we were able to locate the observation in the table candidates, rotations_current = get_transformations( observation, obs.radius, obs.get_direction_sensitivity()) sort = np.argsort(candidates) candidates = candidates[sort] rotations_current = rotations_current[sort] index_current = np.zeros(8) index_current[7] = findings for ind, cand in enumerate(candidates): if ind == 7: continue # last candidate is original observation cand_pos = np.searchsorted(KNOWN, cand) index_current[ind] = cand_pos else: # we were unable to find the observation new_obs, rotations_current = get_transformations( observation, obs.radius, obs.get_direction_sensitivity()) sort = np.argsort(new_obs) new_obs = new_obs[sort] rotations_current = rotations_current[sort] n_new_indices = new_obs.shape[0] insertion_points = np.array( [np.searchsorted(KNOWN, obs) for obs in new_obs]) KNOWN = np.insert(KNOWN, insertion_points, new_obs) QTABLE = np.insert( QTABLE, insertion_points, np.zeros((n_new_indices, QTABLE.shape[1]))) index_current = np.array([ ind + i for (ind, i) in zip(insertion_points, range(n_new_indices)) ]) if ind > 0: for i in range( last_index[living_players[count]][0].shape[0]): l_ind, l_rot = last_index[living_players[count]][0][ i], last_index[living_players[count]][1][i] best_choice_current_state = np.max(QTABLE[int( index_current[0])]) QTABLE[int(l_ind), int(l_rot[int(last_actions[living_players[count]])])] = (1 - a) * \ QTABLE[int(l_ind), int(l_rot[int(last_actions[living_players[count]])])] +\ a * (get_reward(step_state, living_players[count]) + g * best_choice_current_state) last_index[living_players[count]] = (index_current, rotations_current) add_to_trained(write_path + "/records.json", file) # update json table print("Trained with file", file) np.save(write_path + '/observation-' + filename, KNOWN) np.save(write_path + '/q_table-' + filename, QTABLE) return KNOWN, QTABLE
def main(): """ Load and shows a progress file. :return: """ obs0 = ObservationObject( 0, [ 'd_closest_coin_dir', 'd_closest_safe_field_dir', 'd_best_bomb_dropping_dir', 'me_has_bomb', 'd4_is_safe_to_move_a_l', 'd4_is_safe_to_move_b_r', 'd4_is_safe_to_move_c_u', 'd4_is_safe_to_move_d_d', 'dead_end_detect', #'d_closest_crate_dir', #'d_closest_enemy_dir' ], None) obs1 = ObservationObject( 1, [ 'd_closest_coin_dir', 'd_closest_safe_field_dir', 'd_best_bomb_dropping_dir', 'me_has_bomb', 'd4_is_safe_to_move_a_l', 'd4_is_safe_to_move_b_r', 'd4_is_safe_to_move_c_u', 'd4_is_safe_to_move_d_d', 'dead_end_detect', # 'd_closest_crate_dir', 'd_closest_enemy_dir' ], None) obs3 = ObservationObject( 3, [ 'd_closest_coin_dir', 'd_closest_safe_field_dir', #'d_best_bomb_dropping_dir', 'me_has_bomb', 'd4_is_safe_to_move_a_l', 'd4_is_safe_to_move_b_r', 'd4_is_safe_to_move_c_u', 'd4_is_safe_to_move_d_d', #'dead_end_detect', # 'd_closest_crate_dir', #'d_closest_enemy_dir' ], None) obss = [obs0, obs1, obs3] feats = [(0, 6, 3), (1, 6, 4), (3, 5, 2)] #f, (ax1, ax2, ax3) = plt.subplots(1, 3, sharey=False) #axes = (ax1, ax2, ax3) plt.rcParams.update({'font.size': 16}) #matplotlib.rc('xtick', labelsize=22) #matplotlib.rc('ytick', labelsize=22) labelsize = 16 #f.suptitle('Effects of view window size on Q Table length') #form = ScalarFormatter(style='sci', scilimits=(-2, 20)) for ind, obs in enumerate(obss): filepath = "data/qtables/" + obs.get_file_name_string() + "/" if ind == 0: filepath = filepath[:-1] + "OLD/" # # db = view_db(filepath+"observation-"+obs.get_file_name_string()+".npy") #ax = axes[ind] radius, binary, dir = feats[ind] title = "Radius "+str(radius) + " with " + str(binary) + " binary features \nand " + str(dir) + \ " directional features" # ax.set_title(title) # ax.set_xlabel("Number of steps seen") # if ind == 0: # ax.set_ylabel("Length of Q Table") # # x, y = set_progress_chart(filepath) # x = np.concatenate((np.array([0]), x)) # y = np.concatenate((np.array([0]), y)) # ax.plot(x, y) # ax.xaxis.offsetText.set_fontsize(labelsize) # ax.yaxis.offsetText.set_fontsize(labelsize) # plt.sca(ax) plt.xticks(fontsize=labelsize) plt.yticks(fontsize=labelsize) # plt.ticklabel_format(style='sci', axis='both', scilimits=(0, 0)) #plt.savefig("data/image_outputs/compare_qlengths.svg", format="svg") #plt.show() #ax.show() plt.title(title) set_quantities_histogram(filepath + "quantity-" + obs.get_file_name_string() + ".npy") plt.xlabel("Observation frequency") plt.ylabel("Count") plt.ticklabel_format(style='sci', axis='x', scilimits=(0, 0)) plt.show()
import json import numpy as np from state_functions.rewards import event_rewards import matplotlib.pyplot as plt from evaluation_environment import EvaluationEnvironment from agent_code.observation_object import ObservationObject obs0 = ObservationObject(0, [ 'd_closest_coin_dir', 'd_closest_safe_field_dir', 'me_has_bomb', 'dead_end_detect', 'd4_is_safe_to_move_a_l', 'd4_is_safe_to_move_b_r', 'd4_is_safe_to_move_c_u', 'd4_is_safe_to_move_d_d', 'd_best_bomb_dropping_dir', 'd_closest_enemy_dir' ], None) def main(): env = EvaluationEnvironment(["testing_only"], "data/qtables/" + obs0.get_file_name_string() + "/reg_evals") env.run_trials(add_folder=True) _, _, events_path, durations_path = env.analyze_games() events = np.load(events_path) durations = np.load(durations_path) rewards = [ np.sum(player_events * event_rewards) for player_events in events ]
def analyze_games(self, destroy_data: bool = False, print_steps_trained_with=None): """ Analyze all games in self.save_directory and save a new .json summary file there. :param destroy_data: If True, delete games after analysis :param print_steps_trained_with: If not None, print the current number of steps the model under this filepath has trained with. :return: event_count_by_game, game_durations, events_path, durations_path """ files = [ f for f in listdir(self.save_directory) if isfile(join(self.save_directory, f)) ] event_count_by_game = [] # store event counts here game_durations = [] for file in files: if file in ("events.npy", "durations.npy"): continue eventcount = np.zeros((len(self.agent_names), 17)).astype(int) try: game = np.load(self.save_directory + "/" + file) except OSError: print("Skipping " + file + ". Is it a .npy file?") continue obs = ObservationObject(1, [], None) step_num = 0 for step in game: obs.set_state(step) playersliving = False for player in range(len(self.agent_names) ): # track events for agents of interest if player in obs.living_players or player in obs.just_died: eventcount[player] += obs.events[player] playersliving = True step_num += 1 if not playersliving: break # all players of interest are dead game_durations.append(step_num) event_count_by_game.append(eventcount) event_count_by_game, game_durations = np.array( [ec[0] for ec in event_count_by_game]), np.array(game_durations) events_path = self.save_directory + "/" + "events.npy" durations_path = self.save_directory + "/" + "durations.npy" np.save(events_path, event_count_by_game) np.save(durations_path, game_durations) if print_steps_trained_with is not None: steps_trained_path = self.save_directory + "/" + "current_steps.json" with open(steps_trained_path, "w") as f: json.dump( self.return_steps_trained_with(print_steps_trained_with), f) print("Wrote game info to", events_path, "and", durations_path) if destroy_data: # remove files from disk print("Removing game data") for file in files: remove(self.save_directory + "/" + file) return event_count_by_game, game_durations, events_path, durations_path
def main(data_path='data/games/four_players_esa_0_2_cratedens_0_75', train_iterations=40, train_batch_size=10 ): # ='data/games/four_players_esa_0_2_cratedens_0_75/'): """ Train an agent from the ground up and evaluate their performance every few games. Saves all files in a subdirectory of the agent's folder. Supports training + evaluation from pre-existing data (e.g. simple agents), but also training through self-play. :param data_path: If not None, take training data from here (should contain multiples of 100 games) -> Else perform self-play to learn :param train_iterations: How many training cycles to go through :param train_batch_size: How many files to train from in one cycle :return: """ obs = ObservationObject( 0, [ 'd_closest_coin_dir', 'd_closest_safe_field_dir', 'd_best_bomb_dropping_dir', #'d_closest_crate_dir', 'me_has_bomb', 'd4_is_safe_to_move_a_l', 'd4_is_safe_to_move_b_r', 'd4_is_safe_to_move_c_u', 'd4_is_safe_to_move_d_d', 'dead_end_detect', ], None) if data_path is None: create_data = True _data_path = "data/games/SELFPLAY" + obs.get_file_name_string() else: create_data = False _data_path = None evaluation_data_path = 'data/qtables/' + obs.get_file_name_string( ) + "/evaluategames" + datetime.datetime.fromtimestamp( time()).strftime('%Y-%m-%d %H-%M-%S') # where to put the evaluation games for i in range(train_iterations): iteration_str = "/ITERATION_" + str(i + 1) if create_data: data_path = _data_path + iteration_str training_setup = [('testing_only', False), ('testing_only', False), ('testing_only', False), ('testing_only', False)] print("Creating", s.n_rounds, "training episodes by playing agent against itself") main_save_states.main( training_setup, data_path ) # Important: Set settings.py n_rounds to train batch size if create_data: print("Training from", train_batch_size, "newly played games in", data_path) else: print("Training from", train_batch_size, "pre-computed games in", data_path) q_train_from_games_jakob(data_path, "data/qtables/" + obs.get_file_name_string(), obs, a=0.5, g=0.5, stop_after_n_files=train_batch_size, save_every_n_files=5) if i % 2 == 0: q_table_loc = "data/qtables/" + obs.get_file_name_string() iter_output = evaluation_data_path + iteration_str env = EvaluationEnvironment(["testing_only"], iter_output) print("Running", sae_s.n_rounds, "trials vs. simple agents") env.run_trials() env.analyze_games(destroy_data=False, print_steps_trained_with=q_table_loc ) # save an analysis in iter_output,