def reproduce(self): """ This fun creates N new nodes and assigns regions (i.e. hypercubes) to them. :return: A list of the N new nodes. """ if len(self.hypercube_list) == 1: new_hypercubes = [] new_hypercube_length = self.hypercube_list[0].length / 2 old_center = self.hypercube_list[0].center num_new_hypercubes = 2**self.dimension for i in range(num_new_hypercubes): center_translation = np.fromiter(map( lambda x: new_hypercube_length / 2 if x == '1' else -new_hypercube_length / 2, list(bin(i)[2:].zfill(self.dimension))), dtype=np.float) new_hypercubes.append( Hypercube(new_hypercube_length, old_center + center_translation)) return [ UcbNode(self, self.h + 1, new_hypercubes[:int(num_new_hypercubes / 2)]), UcbNode(self, self.h + 1, new_hypercubes[int(num_new_hypercubes / 2):]) ] else: return [ UcbNode( self, self.h + 1, self.hypercube_list[:int(len(self.hypercube_list) / 2)]), UcbNode( self, self.h + 1, self.hypercube_list[int(len(self.hypercube_list) / 2):]) ]
def calculate_m(self, current_segment): results = dict() for neighbor in self.hypercube.backward_neighbors(current_segment): # calculate M + S for each neighbor M = neighbor.get_distance() indices_changed = Hypercube.compare_indices(current_segment.index, neighbor.index) sequence = list() for i in range(0, len(indices_changed)): if indices_changed[i]: sequence.append(self.hypercube.sequences[i][current_segment.index[i]]) else: sequence.append('-') result = (M + self.calculate_s(sequence), neighbor.index) if result[0] not in results: results[result[0]] = list() results[result[0]].append(result[1]) else: results[result[0]].append(result[1]) if len(results) <= 0: return None max_result = max(results.keys()) backtrack_index = results[max_result] return (max_result, backtrack_index)
def generate_output(self, traceback_path): output_sequences = list() while len(output_sequences) < self.hypercube.dimension_count: output_sequences.append(list()) sequence_indices = list(self.hypercube.dimensions) previous_step = traceback_path[0] for current_step in traceback_path[1:]: indices_changed = Hypercube.compare_indices(previous_step, current_step) previous_step = tuple(current_step) for i in range(0, len(indices_changed)): if indices_changed[i]: sequence_indices[i] -= 1 if sequence_indices[i] >= 0: output_sequences[i].insert(0, self.hypercube.sequences[i][sequence_indices[i]]) else: output_sequences[i].insert(0, '-') else: output_sequences[i].insert(0, '-') return output_sequences
use_saved_data = True # when True, the script simply plots the data of the most recently ran simulation, if available # this means that no simulations are run when True. available_arms_mean = 50 num_times_to_run = 10 num_rounds = 50000 num_std_to_show = 5 budgets = [2, 4] line_style_dict = {2: '-', 4: ':'} v1 = np.sqrt(5) v2 = 1 rho = 0.5 N = 2 # changing this HAS NO EFFECT and the ACC-UCB class will not work when N != 2 root_context = Hypercube(1, np.array([0.5, 0.5 ])) # this is called x_{0,1} in the paper def run_one_try(problem_model, num_run, budget): random_algo = Random(problem_model, budget) bench_algo = Benchmark(problem_model, budget) cc_mab_algo = CCMAB(problem_model, budget, root_context.get_dimension()) cc_ucb_algo = ACCUCB(problem_model, v1, v2, N, rho, budget, root_context) ucb_reward, ucb_regret = cc_ucb_algo.run_algorithm() bench_reward, bench_regret = bench_algo.run_algorithm() random_reward, random_regret = random_algo.run_algorithm() mab_reward, mab_regret = cc_mab_algo.run_algorithm() print("Run done: " + str(num_run))