def step(self, action): lookup = [ "R", "L", "D", "U", "B", "F", "R'", "L'", "D'", "U'", "B'", "F'" ] #We are not accounting for half turns tcube = self.cube.copy() step_taken = pc.Formula(lookup[action]) self.cube(step_taken) rwd, over = self.reward(tcube) return utils.flatten_1d_b(self.cube), rwd, over
def reset(self, max_scrambles): self.cube = pc.Cube() alg = pc.Formula() #Random arrangement random_alg = alg.random() if max_scrambles == None: pass else: max_scrambles = np.random.choice(list(range(max_scrambles))) random_alg = random_alg[:max_scrambles] self.cube(random_alg) # print(utils.perc_solved_cube(self.cube)*100) return utils.flatten_1d_b(self.cube) #return states
cubes = [] distance_to_solved = [] for j in tqdm(range(N_SAMPLES)): _cubes, _distance_to_solved = gen_sequence(25) cubes.extend(_cubes) distance_to_solved.extend(_distance_to_solved) cube_next_reward = [] flat_next_states = [] cube_flat = [] for c in tqdm(cubes): flat_cubes, rewards = get_all_possible_actions_cube_small(c) cube_next_reward.append(rewards) flat_next_states.extend(flat_cubes) cube_flat.append(flatten_1d_b(c)) for _ in range(20): cube_target_value = [] cube_target_policy = [] next_state_value, _ = model.predict(np.array(flat_next_states), batch_size=1024) next_state_value = next_state_value.ravel().tolist() next_state_value = list( chunker(next_state_value, size=len(action_map_small))) for c, rewards, values in tqdm( zip(cubes, cube_next_reward, next_state_value)): r_plus_v = 0.4 * np.array(rewards) + np.array(values)
print("********************************************************") print("START : ") print([cube]) print("********************************************************") time.sleep(3) while run: if perc_solved_cube(cube) == 1: run = False print("Solved") break # Take the biggest Q value (= the best action) flat_cube = np.array(flatten_1d_b(cube)).reshape([1, 324]) Qs = sess.run(myAgent.output, feed_dict = {myAgent.state_in: flat_cube}) # print("Ideal : "+str(my_formula.reverse())) print("Q values:") print(Qs) # Take the biggest Q value (= the best action) choice = np.argmax(Qs) # lookup = ["R", "L","D","U","B","F","R'", "L'","D'","U'","B'","F'"] #We are not accounting for half turns acti_map = {'F': 0, 'B': 1, 'U': 2, 'D': 3, 'L': 4, 'R': 5, "F'": 6, "B'": 7, "U'": 8, "D'": 9, "L'": 10, "R'": 11} action = "" for act, val in acti_map.items(): if val == choice:
def solve(Cube): file_path = "auto.h5" model = get_model() model.load_weights(file_path) cube = Cube cube.score = 0 list_sequences = [[cube]] existing_cubes = set() action_list = [] success = False for j in range(50): X = [flatten_1d_b(x[-1]) for x in list_sequences] value, policy = model.predict(np.array(X), batch_size=1024) new_list_sequences = [] for x, policy in zip(list_sequences, policy): new_sequences = [x + [x[-1].copy()(action)] for action in action_map] pred = np.argsort(policy) take_action = list(action_map.keys())[pred[-1]] # append action action_list.append(take_action) cube_1 = x[-1].copy()(list(action_map.keys())[pred[-1]]) new_list_sequences.append(x + [cube_1]) # print("new_list_sequences", len(new_list_sequences)) last_states_flat = [flatten_1d_b(x[-1]) for x in new_list_sequences] value, _ = model.predict(np.array(last_states_flat), batch_size=1024) value = value.ravel().tolist() for x, v in zip(new_list_sequences, value): x[-1].score = v if str(x[-1]) not in existing_cubes else -1 new_list_sequences.sort(key=lambda x: x[-1].score , reverse=True) new_list_sequences = new_list_sequences[:100] existing_cubes.update(set([str(x[-1]) for x in new_list_sequences])) list_sequences = new_list_sequences list_sequences.sort(key=lambda x: perc_solved_cube(x[-1]), reverse=True) prec = perc_solved_cube((list_sequences[0][-1])) if prec == 1: success = True break return success, action_list
model = get_model() model.load_weights(file_path) sample_X, sample_Y, cubes = gen_sample(10) cube = cubes[0] cube.score = 0 list_sequences = [[cube]] existing_cubes = set() for j in range(1000): X = [flatten_1d_b(x[-1]) for x in list_sequences] value, policy = model.predict(np.array(X), batch_size=1024) new_list_sequences = [] for x, policy in zip(list_sequences, policy): new_sequences = [ x + [x[-1].copy()(action)] for action in action_map ] pred = np.argsort(policy) cube_1 = x[-1].copy()(list(action_map.keys())[pred[-1]]) cube_2 = x[-1].copy()(list(action_map.keys())[pred[-2]])
#print(list_sequences) preview_cube = cube #show cube before solving #print([preview_cube]) #print("start solve......") #start solving with X steps for j in range(50): #print("step: {}".format(j + 1)) X = [flatten_1d_b(x[-1]) for x in list_sequences] value, policy = model.predict(np.array(X), batch_size=1024) new_list_sequences = [] for x, policy in zip(list_sequences, policy): new_sequences = [ x + [x[-1].copy()(action)] for action in action_map ] pred = np.argsort(policy) take_action = list(action_map.keys())[pred[-1]]
import pycuber as pc import utils #https://github.com/adrianliaw/PyCuber/blob/version2/examples/sample_program.py #To declare a cube object mycube = pc.Cube() st = utils.flatten_1d_b(mycube) print(st) # k=["R","S"] # #Formula is for set of actions taken # my_formula = pc.Formula(k[0]) # mycube(my_formula) # print(mycube) # #Reversing the actions # my_formula.reverse() # #A object for Algo alg = pc.Formula() #Random arrangement random_alg = alg.random() # random_alg = random_alg[:10] # print(random_alg) mycube(random_alg) print(utils.perc_solved_cube(mycube))