def solve_with_info(self): cube, get_gen_trajectory_act_list = self.generator.generate_cube_with_info( self.move_depth) trajectory = [] self.gen_trajectory_act_list += get_gen_trajectory_act_list for i in range(self.move_depth): action = self.get_action( torch.from_numpy(one_hot_code(cube)).to(self.device)) cube(action) trajectory.append(action) self.act_occ_list[ACTIONS.index(action)] += 1 if cube != SOLVED_CUBE: #if i == self.move_depth-1: # print(f"{trajectory} vs {get_gen_trajectory_act_list}") None else: self.win_counter += 1 # print(trajectory) for act in trajectory: # print(ACTIONS.index(act)) self.win_act_occ_list[ACTIONS.index(act)] += 1 break
def solve(self): cube = self.generator.generate_cube(self.move_depth) for num in range(self.move_depth): cube( self.get_action( torch.from_numpy(one_hot_code(cube)).to(self.device))) if cube != SOLVED_CUBE: None else: self.win_counter += 1 break
def max_mover_solve(self, number_of_tests=1000, modifier=10): cube = self.generator.generate_cube(self.move_depth) for _ in range(self.move_depth * modifier): cube( self.get_action( torch.from_numpy(one_hot_code(cube)).to(self.device))) if cube != SOLVED_CUBE: None else: self.win_counter += 1 break
def visual(self): cube = self.generator.generate_cube(self.move_depth) print(chr(27) + "[1J") print(f"Shuffled cube at depth level {self.move_depth}:\n{repr(cube)}") time.sleep(2) print("Solving cube") for _ in range(self.move_depth): print( repr( cube( self.get_action( torch.from_numpy(one_hot_code(cube)).to( self.device))))) time.sleep(1)
memory.new_full_buffer(replay_shuffle_range) while time < epoch_time: # REPLAY if replay_time > 0 or np.random.random() <= replay_chance: # Get random cube and the reverse of the actions that led to it cube, reverse_actions = memory.generate_random_cube() reverse_actions = reverse_actions[:len(reverse_actions) - stop] depth = len(reverse_actions) if self.adam_optim is not None: for i in range(depth): input = torch.from_numpy(one_hot_code(cube)).to( self.device) act, table_online = self.get_best_act_array( input, Network.Online) val_online = table_online[act] # TODO: convert input, network, act -> input, act, network val_target = self.get_val(input, act, Network.Target) correct_act = reverse_actions[depth - i - 1] reward, reward_vector = self.experience_reward( ACTIONS[act], correct_act)