Ejemplo n.º 1
0
    def step(self, action):
        lookup = [
            "R", "L", "D", "U", "B", "F", "R'", "L'", "D'", "U'", "B'", "F'"
        ]  #We are not accounting for half turns

        tcube = self.cube.copy()

        step_taken = pc.Formula(lookup[action])

        self.cube(step_taken)

        rwd, over = self.reward(tcube)

        return utils.flatten_1d_b(self.cube), rwd, over
Ejemplo n.º 2
0
    def reset(self, max_scrambles):
        self.cube = pc.Cube()
        alg = pc.Formula()
        #Random arrangement
        random_alg = alg.random()

        if max_scrambles == None:
            pass
        else:
            max_scrambles = np.random.choice(list(range(max_scrambles)))
            random_alg = random_alg[:max_scrambles]
        self.cube(random_alg)
        # print(utils.perc_solved_cube(self.cube)*100)
        return utils.flatten_1d_b(self.cube)  #return states
        cubes = []
        distance_to_solved = []
        for j in tqdm(range(N_SAMPLES)):
            _cubes, _distance_to_solved = gen_sequence(25)
            cubes.extend(_cubes)
            distance_to_solved.extend(_distance_to_solved)

        cube_next_reward = []
        flat_next_states = []
        cube_flat = []

        for c in tqdm(cubes):
            flat_cubes, rewards = get_all_possible_actions_cube_small(c)
            cube_next_reward.append(rewards)
            flat_next_states.extend(flat_cubes)
            cube_flat.append(flatten_1d_b(c))

        for _ in range(20):

            cube_target_value = []
            cube_target_policy = []

            next_state_value, _ = model.predict(np.array(flat_next_states),
                                                batch_size=1024)
            next_state_value = next_state_value.ravel().tolist()
            next_state_value = list(
                chunker(next_state_value, size=len(action_map_small)))

            for c, rewards, values in tqdm(
                    zip(cubes, cube_next_reward, next_state_value)):
                r_plus_v = 0.4 * np.array(rewards) + np.array(values)
Ejemplo n.º 4
0
    print("********************************************************")
    print("START : ")
    

    print([cube])
    print("********************************************************")
    time.sleep(3)

    while run:            

        if perc_solved_cube(cube) == 1:
            run = False
            print("Solved")
            break
        # Take the biggest Q value (= the best action)
        flat_cube = np.array(flatten_1d_b(cube)).reshape([1, 324])

        Qs = sess.run(myAgent.output, feed_dict = {myAgent.state_in: flat_cube})
        # print("Ideal : "+str(my_formula.reverse()))
        print("Q values:")
        print(Qs)

        # Take the biggest Q value (= the best action)
        choice = np.argmax(Qs)
        
        # lookup     = ["R", "L","D","U","B","F","R'", "L'","D'","U'","B'","F'"] #We are not accounting for half turns
        acti_map = {'F': 0, 'B': 1, 'U': 2, 'D': 3, 'L': 4, 'R': 5, "F'": 6, "B'": 7, "U'": 8, "D'": 9, "L'": 10, "R'": 11}

        action = ""
        for act, val in acti_map.items():
            if val == choice:
Ejemplo n.º 5
0
def solve(Cube):

    file_path = "auto.h5"

    model = get_model()

    model.load_weights(file_path)

    cube = Cube
    cube.score = 0

    list_sequences = [[cube]]

    existing_cubes = set()

    action_list = []

    success = False

    for j in range(50):

        X = [flatten_1d_b(x[-1]) for x in list_sequences]

        value, policy = model.predict(np.array(X), batch_size=1024)

        new_list_sequences = []

        for x, policy in zip(list_sequences, policy):

            new_sequences = [x + [x[-1].copy()(action)] for action in action_map]

            pred = np.argsort(policy)

            take_action = list(action_map.keys())[pred[-1]]

            # append action
            action_list.append(take_action)

            cube_1 = x[-1].copy()(list(action_map.keys())[pred[-1]])

            new_list_sequences.append(x + [cube_1])


        # print("new_list_sequences", len(new_list_sequences))
        last_states_flat = [flatten_1d_b(x[-1]) for x in new_list_sequences]
        value, _ = model.predict(np.array(last_states_flat), batch_size=1024)
        value = value.ravel().tolist()

        for x, v in zip(new_list_sequences, value):
                    x[-1].score = v if str(x[-1]) not in existing_cubes else -1

        new_list_sequences.sort(key=lambda x: x[-1].score , reverse=True)

        new_list_sequences = new_list_sequences[:100]

        existing_cubes.update(set([str(x[-1]) for x in new_list_sequences]))

        list_sequences = new_list_sequences

        list_sequences.sort(key=lambda x: perc_solved_cube(x[-1]), reverse=True)

        prec = perc_solved_cube((list_sequences[0][-1]))

        if prec == 1:
            success = True
            break

    return success, action_list
    model = get_model()

    model.load_weights(file_path)

    sample_X, sample_Y, cubes = gen_sample(10)
    cube = cubes[0]
    cube.score = 0

    list_sequences = [[cube]]

    existing_cubes = set()

    for j in range(1000):

        X = [flatten_1d_b(x[-1]) for x in list_sequences]

        value, policy = model.predict(np.array(X), batch_size=1024)

        new_list_sequences = []

        for x, policy in zip(list_sequences, policy):

            new_sequences = [
                x + [x[-1].copy()(action)] for action in action_map
            ]

            pred = np.argsort(policy)

            cube_1 = x[-1].copy()(list(action_map.keys())[pred[-1]])
            cube_2 = x[-1].copy()(list(action_map.keys())[pred[-2]])
Ejemplo n.º 7
0
            #print(list_sequences)

            preview_cube = cube

            #show cube before solving
            #print([preview_cube])

            #print("start solve......")

            #start solving with X steps
            for j in range(50):

                #print("step: {}".format(j + 1))

                X = [flatten_1d_b(x[-1]) for x in list_sequences]

                value, policy = model.predict(np.array(X), batch_size=1024)

                new_list_sequences = []

                for x, policy in zip(list_sequences, policy):

                    new_sequences = [
                        x + [x[-1].copy()(action)] for action in action_map
                    ]

                    pred = np.argsort(policy)

                    take_action = list(action_map.keys())[pred[-1]]
Ejemplo n.º 8
0
import pycuber as pc
import utils
#https://github.com/adrianliaw/PyCuber/blob/version2/examples/sample_program.py
#To declare a cube object
mycube = pc.Cube()

st = utils.flatten_1d_b(mycube)

print(st)

# k=["R","S"]
# #Formula is for set of actions taken
# my_formula = pc.Formula(k[0])

# mycube(my_formula)
# print(mycube)
# #Reversing the actions
# my_formula.reverse()

# #A object for Algo
alg = pc.Formula()
#Random arrangement
random_alg = alg.random()

# random_alg = random_alg[:10]

# print(random_alg)

mycube(random_alg)
print(utils.perc_solved_cube(mycube))