def __init__(self, tiles, board, strategy='max_depth'): self.initial_tiles, self.initial_board = tiles, board self.state = State(self.initial_board, self.initial_tiles) self.solution_checker = SolutionChecker(len(tiles), get_rows(board), get_cols(board)) self.strategy=strategy self.n_tiles_placed = 0 self.solution_tiles_order = [] # for MCTS vis purposes self.colorful_states = True
def __init__(self, tiles, board): self.Qsa = {} # stores Q values for s,a (as defined in the paper) self.Nsa = {} # stores #times edge s,a was visited self.Ns = {} # stores #times board s was visited self.Ps = {} # stores initial policy (returned by neural net) self.Es = {} # stores game.getGameEnded ended for board s self.Vs = {} # stores game.getValidMoves for board s self.initial_tiles, self.initial_board = tiles, board self.state = State(self.initial_board, self.initial_tiles) self.solution_checker = SolutionChecker(len(tiles), get_rows(board), get_cols(board))
def test_check_imperfect_solution_count_files_2(self): n = 4 cols = 10 rows = 5 dg = DataGenerator() some_instance_np_array = np.array( [[1, 10, 1], [6, 10, 2], [2, 10, 3], [1, 10, 4], [1, 10, 1]]) solution_checker = SolutionChecker(n, cols, rows) self.assertEqual( solution_checker.get_reward(some_instance_np_array, count_tiles=True), 1 / n )
def test_check_imperfect_solution(self): n = 4 cols = 10 rows = 5 dg = DataGenerator() some_instance_np_array = np.array( [[1, 10, 1], [2, 10, 2], [1, 10, 3], [5, 10, 4], [1, 10, 1]]) solution_checker = SolutionChecker(n, cols, rows) self.assertEqual( solution_checker.get_reward(some_instance_np_array), (10 * 5) / (cols * rows) )
def test_check_perfect_solution(self): n = 20 w = 40 h = 40 dg = DataGenerator() some_instance_visual = dg.gen_instance_visual(n, w, h) perfect_bin_configuration = sorted(some_instance_visual, key=lambda x: (x[2][0], x[2][1])) some_instance_np_array = dg._transform_instance_visual_to_np_array(some_instance_visual) solution_checker = SolutionChecker(n, h, w) self.assertEqual( solution_checker.get_reward(np.array(perfect_bin_configuration)), 0 )
def test_check_imperfect_solution_count_tiles(self): n = 4 cols = 10 rows = 5 dg = DataGenerator() some_instance_visual = dg.gen_instance_visual(n, cols, rows) # NOTE: first bin always repeated some_instance_np_array = np.array( [[1, 10, 1], [2, 10, 2], [1, 10, 3], [5, 10, 4], [1, 10, 1]]) solution_checker = SolutionChecker(n, cols, rows) self.assertEqual( solution_checker.get_reward(some_instance_np_array, count_tiles=True), 1 / n )
def build_reward(self): # reorder input % tour and return tour length (euclidean distance) self.permutations = tf.stack( # this just creates a vectors with repeating idxs so we can gather later [ tf.tile(tf.expand_dims(tf.range(self.batch_size,dtype=tf.int32), 1), [1, self.n+1]), self.tour ], 2 ) if self.is_training==True: self.ordered_input_ = tf.gather_nd(self.input_,self.permutations) else: self.ordered_input_ = tf.gather_nd(tf.tile(self.input_,[self.batch_size,1,1]),self.permutations) solution_checker = SolutionChecker(self.n, self.w, self.h) sess = tf.Session() rewards = tf.py_func( solution_checker.get_rewards, [self.ordered_input_, self.count_non_placed_tiles, self.combinatorial_reward], tf.float32) self.reward = rewards tf.summary.scalar('reward_mean', tf.reduce_mean(rewards))
def test_bin_outside_border(self): n = 20 h = 50 w = 50 solution_checker = SolutionChecker(n, h, w) # # 11 ------------- # | | # | | | # | | | # -----------------------| # 40 solution_checker.LFBs = SortedKeyList([], key=lambda x: (x[1], x[0])) solution_checker.LFBs.add((40, 11)) _bin = (10, 10) self.assertFalse(solution_checker.is_bin_outside_borders(_bin)) _bin = (12, 10) self.assertTrue(solution_checker.is_bin_outside_borders(_bin))
for i in tqdm(range(10)): # test instance seed_ = 1+random.randint(0, 10000) dg = DataGenerator() # Create Data Generator input_batch = dg.train_batch( 1, actor.n, actor.w, actor.h, actor.dimension, seed=i, freeze_first_batch=config.freeze_first_batch ) feed = {actor.input_: input_batch} # Get feed dict tour, reward = sess.run([actor.tour, actor.reward], feed_dict=feed) # sample tours j = np.argmin(reward) # find best solution best_permutation = tour[j][:-1] predictions_length.append(reward[j]) solution_checker = SolutionChecker(actor.n, actor.w, actor.h) # TODO: find how this is called in numpy (sort by index) bins = [] for el in best_permutation: bins.append(input_batch[0][el]) solution_checker.get_reward(bins) grid = solution_checker.grid print('reward',reward[j]) solution_checker.visualize_grid() #dataset.visualize_2D_trip(input_batch[0][best_permutation]) #dataset.visualize_sampling(tour) # dataset.visualize_2D_trip(opt_tour)