class Optimizer: def __init__(self): configs = load_configs("Configs.json") value_configs = load_configs("Configs_ValueNN.json") policy_configs = load_configs("Configs_PolicyNN.json") #logging.basicConfig(format='%(asctime)s %(message)s', filename=configs["log_name"], level=logging.WARNING, #datefmt="%Y-%m-%d %H:%M:%S") #logging.info("Starting Optimizer.") self.width = configs["width"] self.height = configs["height"] self.buffer_size = configs["buffer_size"] self.filename = configs["data_filename"] self.experiment_name = "Deviations/" + configs["val_filename"] self.buffer = Buffer(self.buffer_size) self.env = BlockRelocation(self.height, self.width) #self.model = ValueNetwork(configs=value_configs) #self.policy_network = PolicyNetwork(configs=policy_configs) #self.combined_model = CombinedModel(configs=configs) #self.value_wrapper = EstimatorWrapper(self.model) #self.policy_wrapper = EstimatorWrapper(self.policy_network) self.value_net = ValueNetworkKeras(value_configs) self.policy_net = PolicyNetworkKeras(policy_configs) self.tree_searcher = TreeSearch( self.value_net, BlockRelocation(self.height, self.width), self.policy_net) self.tree_searcher.std_vals = load_obj(self.experiment_name) self.baseline_params = { "search_depth": 5, "epsilon": 0.1, "threshold": 0.01, "drop_percent": 0.25, "factor": 0.01 } self.current_search_params = { "search_depth": 5, "epsilon": 0.1, "threshold": 0.05, "drop_percent": 0.3, "factor": 0.05 } self.dfs_params_hq = {"stop_param": 4, "k": 12} self.dfs_params_fast = {"stop_param": 1, "k": 12} #logging.info("Start up process complete.") def create_training_example(self, permutations=True, units=8): if units < self.height * self.width: matrix = self.env.create_instance_random(units) else: matrix = self.env.create_instance(self.height, self.width) if units < 6: path = self.tree_searcher.find_path_2(matrix.copy()) else: path = self.tree_searcher.find_path_dfs(matrix.copy()) # In case the solver can't solve it with the given depth, this function is called again if not path: return self.create_training_example(permutations=permutations, units=units) try: data = self.tree_searcher.move_along_path(matrix.copy(), path) except TypeError: return self.create_training_example(permutations=permutations, units=units) if permutations: data = self.create_permutations(data) else: data = self.prepare_data_for_model(data) return data, len(path) def train_on_new_instances(self, num=1, units=10, perm=False, train=False): data_list = [] step_list = [] for ii in range(num): data, steps = self.create_training_example(permutations=perm, units=units) data_list.append(data) step_list.append(steps) data = pd.concat(data_list, ignore_index=True, sort=False) self.calculate_deviations(data) #with open(self.filename, 'a') as f: # data.to_csv(f, header=False, index=False) if train: train_data = data.sample(int(data.shape[0] / 3)) self.value_net.train_df(train_data, epochs=2, validation=False) self.policy_net.train_df(train_data, epochs=2, validation=False) self.buffer.append(data) def calculate_deviations(self, data): data = data.copy() data["pred"] = list(self.value_net.predict_df(data)) data["pred"] = data["pred"].apply(lambda x: x[0]) data["Value"] = data["Value"].astype('int64') data["deviation"] = abs(data["pred"] - data["Value"]) deviations = data.groupby("Value")["deviation"].mean() new_vals = deviations.to_dict() old_vals = self.tree_searcher.std_vals for key in new_vals.keys(): if key not in old_vals: old_vals[key] = new_vals[key] else: old_vals[key] = (old_vals[key] + new_vals[key]) / 2 self.tree_searcher.std_vals = old_vals save_obj(old_vals, self.experiment_name) return old_vals def test_deviations(self): for i in range(20): data, steps = self.create_training_example(units=9) print(steps) print("DONE") print(self.calculate_deviations(data)) def prepare_data_for_model(self, data): # TODO DONT WANT TO CHANGE THE COLUMN NAME HERE data["StateRepresentation"] = data["StateRepresentation"].apply( lambda x: x.transpose().flatten()) data.columns = ["Moves", "StateRepresentation", "Value"] data["MovesEncoded"] = data["Moves"].copy() data["MovesEncoded"] = data["MovesEncoded"].apply( lambda x: self.tree_searcher.move_to_hot_one_encoding(x)) return data def create_permutations(self, df): df_list = [] for i, row in df.iterrows(): # creating representations rep = self.env.all_permutations_state(row.StateRepresentation) rep = list(rep) # creating value column val = [np.array(row.Value) for _ in range(len(rep))] # creating move and move_encoded columns moves = self.env.all_permutations_move(*row.Move) encoded = [ self.tree_searcher.move_to_hot_one_encoding(m) for m in moves ] # creating the DataFrame temp_df = pd.DataFrame({ "StateRepresentation": rep, "Value": val, "Moves": moves, "MovesEncoded": encoded }) # removing duplicates temp_df["hashable_state"] = temp_df.StateRepresentation.apply( lambda x: x.tostring()) temp_df = temp_df.drop_duplicates(subset="hashable_state") temp_df = temp_df.drop(columns="hashable_state") df_list.append(temp_df) final_df = pd.concat(df_list, ignore_index=True) return final_df def reinforce(self, iterations=20, units=12, instances=200, train=False): print("Starting reinforce with {} iterations and {} units.".format( iterations, units)) start = time.time() for x in range(iterations): print("Iteration " + str(x + 1)) self.train_on_new_instances(instances, units=units, train=train) end = time.time() print(end - start) with open("duration.txt", 'a+') as f: f.write( str(units) + " " + str(iterations * instances) + " " + str(end - start) + "\n") def train_and_update_models(self, epochs=20): data = self.buffer.get_sample(size=self.buffer.max_size) print("Training Policy Network ...") self.policy_net.train_df(data, epochs=epochs, validation=False) print("Training Value Network ...") self.value_net.train_df(data, epochs=epochs, validation=False) def train_on_csv(self, filename): data = pd.read_csv(filename) print(data.shape) data["StateRepresentation"] = data["StateRepresentation"].apply( lambda x: np.fromstring(x[1:-1], sep=" ")) data["MovesEncoded"] = data["MovesEncoded"].apply( lambda x: np.fromstring(x[1:-1], sep=" ")) data["hashed"] = data["StateRepresentation"].apply( lambda s: s.tostring()) data = data.drop_duplicates(subset="hashed") data = data.drop(columns=["hashed"]) data = data.reset_index(drop=True) print(data.shape) train_data, test_data = train_test_split(data, shuffle=True, test_size=0.1) for i in range(5): print("Currently on run {} of training.".format(i + 1)) self.policy_net.train_df(train_data) self.value_net.train_df(train_data) print("Policy Network Statistics:") print(self.value_net.eval(test_data)) print("Value Network Statistics:") print(self.policy_net.eval(test_data)) print("Training finished!") def full_experiment(self): total_container = self.width * self.height for ii in range(5): self.reinforce(iterations=5, units=5, instances=200, train=True) self.train_and_update_models() for ii in range(6, total_container - 5): print("Training: Currently training on {} units.".format(ii)) self.reinforce(iterations=20, units=ii, instances=2000) self.buffer.remove_duplicates() if ii % 3 == 0: data = self.buffer.get_sample(self.buffer.max_size, remove=True) self.policy_net.retrain_model(data) self.value_net.retrain_model(data) self.buffer.increase_max_size(0.1) del data else: self.train_and_update_models() for ii in range(total_container - 5, total_container + 1): print("Training: Currently training on {} units.".format(ii)) self.reinforce(iterations=20, units=ii, instances=2000) self.buffer.remove_duplicates() data = self.buffer.storage self.policy_net.retrain_model(data) self.value_net.retrain_model(data) self.buffer.increase_max_size(0.1) for ii in range(10): print("Training with all units. Currently on iteration ", str(ii + 1)) bm = Benchmark() bm.benchmark_caserta() self.reinforce(iterations=10, units=self.height * self.width, instances=2000) self.buffer.remove_duplicates() self.train_and_update_models() # find best parameters # run experiment on test instances def produce_testing_data(self, filename, examples=10000, perm=False): data_list = [] start = time.time() for e in range(examples): if e % 500 == 0 and e > 0: h = not os.path.isfile(filename) final_df = pd.concat(data_list) end = time.time() with open(filename, 'a+') as f: final_df.to_csv(f, header=h, index=False) print(end - start) start = time.time() data_list = [] data, length = self.create_training_example(permutations=perm, units=self.height * self.width) data_list.append(data) # in case number is not divisible by 500 final_df = pd.concat(data_list) with open(filename, 'a') as f: final_df.to_csv(f, header=False, index=False)