Ejemplos de Expectimax.run_expectimax en Python

Lenguaje de programación: Python

Namespace/Package Name: expectimax

Clase / Tipo: Expectimax

Método / Función: run_expectimax

Ejemplos en hotexamples.com: 2

Python Expectimax.run_expectimax - 2 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de expectimax.Expectimax.run_expectimax extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

Expectimax(6)

run_expectimax(2)

retrievemove(1)

Ejemplo n.º 1

Mostrar archivo

Archivo: game_controller.py Proyecto: hakon0601/AIProg

class GameController():

    NR_OF_TRAINING_CASES = 7000
    NR_OF_TEST_CASES = 833
    NR_OF_OUTPUT_NODES = 4

    def __init__(self,
                 collect_cases=False,
                 use_merge_input_nodes=False,
                 depth=3):
        self.collect_cases = collect_cases
        self.use_merge_input_nodes = use_merge_input_nodes
        self.depth = depth
        if collect_cases:
            self.neural_network_cases = load_cases()
            self.expectimax = Expectimax()
        self.results_from_nn_playing = []
        self.results_from_random_playing = []
        self.results = []
        self.results_from_random_playing = [112] * 50
        self.start_time = time()
        self.print_commands()
        self.setup_network()
        self.user_control()
        self.start_game()

    def start_game(self):
        if len(self.results) < self.results_length:
            print("run nr", len(self.results))
            self.game_board = Game2048(
                board=[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]])
            self.board = self.game_board.board
            self.game_board.generate_new_node()
            self.move_count = 0
            #self.draw_board()
            self.time = time()
            self.run_algorithm()
        else:
            print(self.results)
            print("Largest tile", max(self.results))
            print("Average tile", sum(self.results) / float(len(self.results)))
            if self.action[0] == "p":
                self.results_from_nn_playing = copy.copy(self.results)
            elif self.action[0] == "r":
                self.results_from_random_playing = copy.copy(self.results)
            elif self.action[0] == "c":
                self.results_from_nn_playing = copy.copy(self.results)
                self.print_comparison()
            self.results = []
            self.user_control()
            self.start_game()

    def setup_network(self):
        if self.use_merge_input_nodes:
            number_of_input_nodes = 24
        else:
            number_of_input_nodes = 16
        use_default = input("Use default [y/n]: ")
        if use_default[0] == "y":
            nodes_in_each_layer = [700]
            activation_functions = [3, 4]
            learning_rate = 0.02
            bulk_size = 1
        else:
            nodes_in_each_layer = list(
                map(
                    int,
                    input("Hidden nodes in each layer: ").replace(
                        " ", "").split(",")))
            print("TanH: 1, Sigmoid: 2, Rectify: 3, Softmax: 4")
            activation_functions = list(
                map(
                    int,
                    input("Select activation functions: ").replace(
                        " ", "").split(",")))
            learning_rate = float(input("learning rate: "))
            bulk_size = int(input("Bulk size: "))

        self.move_classifier = MoveClassifier(
            nr_of_training_cases=GameController.NR_OF_TRAINING_CASES,
            nr_of_test_cases=GameController.NR_OF_TEST_CASES,
            nr_of_nodes_in_layers=nodes_in_each_layer,
            act_functions=activation_functions,
            lr=learning_rate,
            number_of_input_nodes=number_of_input_nodes,
            number_of_output_nodes=GameController.NR_OF_OUTPUT_NODES,
            bulk_size=bulk_size)

        self.move_classifier.preprocessing(boards=self.move_classifier.boards,
                                           labels=self.move_classifier.labels)
        self.move_classifier.preprocessing(
            boards=self.move_classifier.test_boards,
            labels=self.move_classifier.test_labels)
        #self.move_classifier.test_preprocessing(boards=self.move_classifier.boards, labels=self.move_classifier.labels)

        if self.use_merge_input_nodes:
            self.move_classifier.boards = self.move_classifier.preprocessing_row_column(
                boards=self.move_classifier.boards)
            self.move_classifier.test_boards = self.move_classifier.preprocessing_row_column(
                boards=self.move_classifier.test_boards)
            #self.move_classifier.add_extra_nodes(self.move_classifier.boards, extra_nodes)
            #self.move_classifier.add_extra_nodes(self.move_classifier.test_boards, extra_test_nodes)

        self.errors = []

    def user_control(self):
        while True:
            self.action = input("Enter a command or a number to train: ")
            # Test classification percentage using both the test set and training set
            if self.action[0] == "t":
                self.test_percentage_training_and_test_set()
            # Play forever
            elif self.action[0] == "s":
                self.results_length = float('inf')
                return
            # Play 50 games, using the neural net p or a random player r
            elif self.action[0] == "p" or self.action[0] == "r":
                self.results_length = 50
                return
            # Run the grading function
            elif self.action[0] == "c":
                if len(self.results_from_nn_playing) < 50:
                    self.results_length = 50
                    return
                else:
                    self.print_comparison()
            elif self.action[0] == "l":
                self.collect_cases = not self.collect_cases
                if self.collect_cases:
                    self.neural_network_cases = load_cases()
                    self.expectimax = Expectimax()
            else:
                self.errors = self.move_classifier.do_training(
                    epochs=int(self.action), errors=self.errors)
                self.test_percentage_training_and_test_set()

            print("Total time elapsed: " +
                  str(round((time() - self.start_time) / 60, 1)) + " min")

    def run_algorithm(self):
        self.continuing = True
        if self.game_board.is_game_over():
            self.conclude_game()
            return self.start_game()
        current_node = State(self.game_board, self.depth)
        self.move_count += 1
        flat_board = current_node.board.board[3] + current_node.board.board[
            2] + current_node.board.board[1] + current_node.board.board[0]
        if self.collect_cases:
            self.gather_case_and_result_using_expectimax(
                current_node, flat_board)
        if self.action[0] == "r":
            chosen_move = self.choose_legal_random_move()
        else:
            flat_board = [flat_board]
            self.move_classifier.preprocessing(boards=flat_board, labels=None)
            if self.use_merge_input_nodes:
                flat_board = self.move_classifier.preprocessing_row_column(
                    boards=flat_board)
#               flat_board = self.move_classifier.add_extra_nodes([flat_board], extra_nodes)[0]

            output_activations = self.move_classifier.predictor(flat_board)
            chosen_move = self.choose_legal_move_from_nn(output_activations)

        self.do_move(chosen_move)
        self.game_board.generate_new_node()

    def do_move(self, chosen_move):
        if chosen_move == 0:
            self.game_board.move_left()
        elif chosen_move == 1:
            self.game_board.move_right()
        elif chosen_move == 2:
            self.game_board.move_up()
        elif chosen_move == 3:
            self.game_board.move_down()

    def conclude_game(self):
        self.continuing = False
        largest_tile = self.game_board.get_largest_tile()
        print("Largest tile", largest_tile)
        self.results.append(largest_tile)
        print("Average tile", sum(self.results) / float(len(self.results)))
        if self.collect_cases:
            print("size of training data", len(self.neural_network_cases))
            dump_cases(self.neural_network_cases)

    def choose_legal_random_move(self):
        while True:
            r = randint(0, 3)
            if self.game_board.is_move_legal(r):
                return r

    def choose_legal_move_from_nn(self, result):
        chosen_move = None

        while chosen_move == None or not self.game_board.is_move_legal(
                chosen_move):
            if chosen_move != None:
                result[0][chosen_move] = -1
            chosen_move = np.argmax(result[0])
        return chosen_move

    def gather_case_and_result_using_expectimax(self, current_node,
                                                flat_board):
        self.expectimax.run_expectimax(current_node, self.depth, -float("inf"),
                                       float("inf"), None)
        self.neural_network_cases[str(flat_board)] = self.expectimax.result

    def welch(self, list1, list2):
        params = {"results": str(list1) + " " + str(list2), "raw": "1"}
        resp = requests.post('http://folk.ntnu.no/valerijf/6/', data=params)
        return resp.text

    def test_percentage_training_and_test_set(self):
        output_activations = self.move_classifier.do_testing(
            boards=self.move_classifier.test_boards)
        print(
            "Statistics (test set): \t\t",
            self.move_classifier.check_result(
                output_activations, labels=self.move_classifier.test_labels),
            "%")
        output_activations = self.move_classifier.do_testing(
            boards=self.move_classifier.boards)
        print(
            "Statistics (training set):\t ",
            self.move_classifier.check_result(
                output_activations, labels=self.move_classifier.labels), "%")

    def print_comparison(self):
        print("NN results:\t", self.results_from_nn_playing)
        print("Random results:\t", self.results_from_random_playing)
        print("largest tiles", max(self.results_from_nn_playing),
              max(self.results_from_random_playing))
        print(
            "average tiles",
            sum(self.results_from_nn_playing) /
            float(len(self.results_from_nn_playing)),
            sum(self.results_from_random_playing) /
            float(len(self.results_from_random_playing)))
        points = self.welch(self.results_from_random_playing,
                            self.results_from_nn_playing)
        print("points", points)

    def print_commands(self):
        print("Commands")
        print(
            "t: Test the network classification using both the test set and the training set"
        )
        print("l toggle case collection. Currently: ", self.collect_cases)
        print("s: Run infinite times using NN")
        print("p: Run 50 games using NN")
        print("r: Run 50 games using a random player")
        print("c: Compare the two runs of 50")

Ejemplo n.º 2

Mostrar archivo

class Gui(tk.Tk):
    def __init__(self, delay, diagonal=False, *args, **kwargs):
        tk.Tk.__init__(self, *args, **kwargs)
        self.title("2048-solver")
        self.cell_width = self.cell_height = 100
        self.dim = (4, 4)
        self.delay=delay
        screen_width = self.dim[0]*self.cell_width+1
        screen_height = self.dim[1]*self.cell_height+1
        self.canvas = tk.Canvas(self, width=screen_width, height=screen_height, borderwidth=0, highlightthickness=0)
        self.canvas.pack(side="top", fill="both", expand="true")
        self.old_nr_of_cases = 0

        #self.bind_keys()

        self.color_dict = self.fill_color_dict()
        self.results = []
        self.start_game()
        #self.game_board = Game2048(board=[[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0]])
        #self.board = self.game_board.board
        #self.game_board.generate_new_node()
        #self.draw_board()

    def start_game(self):
        print "avg", sum(self.results)/float(len(self.results) + 0.001)
        if len(self.results) < 30:
            self.game_board = Game2048(board=[[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0]])
            self.board = self.game_board.board
            self.game_board.generate_new_node()
            self.depth = 4
            self.move_count = 0
            self.expectimax = Expectimax()
            self.draw_board()
            self.time = time()
            self.run_algorithm()
        else:
            print self.results
            print "avg", sum(self.results)/float(len(self.results))


    def run_algorithm(self):
        if self.game_board.open_cells_count() < 4:
            self.depth = 3
        else:
            self.depth = 3
        continuing = True
        if self.game_board.is_game_over():
            largest_tile = self.game_board.get_largest_tile()
            print "largest tile", largest_tile
            print "time elapsed: " + str(round((time() - self.time)/60, 1)) + " min"
            self.results.append(largest_tile)
            continuing = False
            print "move count", self.move_count
            return self.start_game()
        current_node = State(self.game_board, self.depth)
        self.move_count += 1
        chosen_move = self.expectimax.run_expectimax(current_node, self.depth, -float("inf"), float("inf"), None)
        expectimax_result = self.expectimax.result
        flat_board = current_node.board.board[3] + current_node.board.board[2] + current_node.board.board[1] + current_node.board.board[0]
        #TODO what is this? Continuing
        if chosen_move == None:
            Continuing = False
        elif chosen_move == 0:
            Continuing = False
        elif chosen_move == "left":
            self.game_board.move_left()
        elif chosen_move == "right":
            self.game_board.move_right()
        elif chosen_move == "up":
            self.game_board.move_up()
        elif chosen_move == "down":
            self.game_board.move_down()
        else:
            print "finished because of error in minimax chosen move"
        self.game_board.generate_new_node()
        self.draw_board()
        if continuing:
            self.after(self.delay, lambda: self.run_algorithm())

    def bind_keys(self):
        self.bind('<Up>', lambda event: self.move(self, self.game_board.move_up(), 0))
        self.bind('<Right>', lambda event: self.move(self, self.game_board.move_right(), 1))
        self.bind('<Down>', lambda event: self.move(self, self.game_board.move_down(), 2))
        self.bind('<Left>', lambda event: self.move(self, self.game_board.move_left(), 3))

    def move(self, event, is_moved, direction):
        if is_moved:
            self.game_board.generate_new_node()
            self.draw_board()
            self.f = open('/Users/hakon0601/Dropbox/Python/AIProg/AIProg_Module_5/2048trainingdata.txt', 'a')
            board = ""
            for i in range(3,-1,-1):
                board += (str(self.game_board.board[i][0])) + " "
                board += (str(self.game_board.board[i][1])) + " "
                board += (str(self.game_board.board[i][2])) + " "
                board += (str(self.game_board.board[i][3])) + " "
            board += " "
            board += str(direction)
            self.f.write(board)
            self.f.write("\n")
            self.f.close()

    def draw_board(self):
        self.canvas.delete("all")
        for y in range(self.dim[1]):
                for x in range(self.dim[0]):
                    x1 = x * self.cell_width
                    y1 = self.dim[1]*self.cell_height - y * self.cell_height
                    x2 = x1 + self.cell_width
                    y2 = y1 - self.cell_height
                    cell_type = self.board[y][x]
                    text = str(self.board[y][x])
                    color = self.color_dict[str(self.board[y][x])]
                    self.canvas.create_rectangle(x1, y1, x2, y2, fill=color, tags="rect")
                    if cell_type != 0:
                        self.canvas.create_text(x1+self.cell_width/2, y1-self.cell_height/2, text=text)

    def fill_color_dict(self):
        color_dict = {
            '0': "white",
            '2': "lemon chiffon",
            '4': "peach puff",
            '8': "sandy brown",
            '16': "dark orange",
            '32': "salmon",
            '64': "tomato",
            '128': "khaki",
            '256': "khaki",
            '512': "red",
            '1024': "light goldenrod",
            '2048': "firebrick",
            '4096': "dim grey",
            '8192': "light goldenrod",
        }
        return color_dict