class MainApp(App): def build(self): #sound = SoundLoader.load('muzlome_Metallica_-_Sad_But_True_47954412.mp3') #sound.play() self.create_start_screen() self.create_game_screen() self.main_layout = FloatLayout() self.main_layout.add_widget(self.start_screen) return self.main_layout def create_start_screen(self): self.start_screen = FloatLayout(size_hint=(1, 1), pos_hint={ 'center_x': 0.5, 'center_y': 0.5 }) background_image = Image(source=BACKGROUND_IMAGE, size_hint=[1, 1], pos_hint={ 'center_x': 0.5, 'center_y': 0.5 }, allow_stretch=True, keep_ratio=False) background_belki = Image(source=BELKI, size_hint=[1, 1], pos_hint={ 'center_x': 0.5, 'center_y': 0.5 }, allow_stretch=True, keep_ratio=False) buttons = BoxLayout(orientation='vertical', spacing=10, size_hint=(0.3, 0.4), pos_hint={ 'center_x': 0.7, 'center_y': 0.4 }) start_game_button = Button(text='Start game', on_press=self.reset_game_mode, background_normal='', background_color=BUTTON_COLOR, color=[0, 0, 0, 1]) settings_button = Button( text='Settings', #on_press=None, background_normal='', background_color=BUTTON_COLOR, color=[0, 0, 0, 1]) buttons.add_widget(start_game_button) buttons.add_widget(settings_button) self.start_screen.add_widget(background_image) self.start_screen.add_widget(buttons) def create_game_screen(self): self.board = BoardWidget(FIELD_WIDTH, FIELD_HEIGHT) exit_button = Button(text='Quit', on_press=self.leave, size_hint=[0.25, 0.1], pos_hint={ 'center_x': 1.30, 'center_y': 0.66 }, background_color=BUTTON_COLOR, background_normal='') restart_button = Button(text='Restart', on_press=self.reset_game_mode, size_hint=[0.25, 0.1], pos_hint={ 'center_x': 1.30, 'center_y': 0.90 }, background_color=BUTTON_COLOR, background_normal='') back_to_menu_button = Button( text='Main menu', #on_press=self.back_to_main_menu, size_hint=[0.25, 0.1], pos_hint={ 'center_x': 1.30, 'center_y': 0.78 }, background_color=BUTTON_COLOR, background_normal='') make_move_button = Button(text='Start game', on_press=self.start_game, size_hint=[0.25, 0.1], pos_hint={ 'center_x': 1.30, 'center_y': 0.4 }, background_color=BUTTON_COLOR, background_normal='') self.game_screen = FloatLayout(size_hint=[0.5, 1], pos_hint={ 'center_x': 0.5, 'center_y': 0.5 }) self.game_screen.add_widget(self.board) self.game_screen.add_widget(exit_button) self.game_screen.add_widget(restart_button) self.game_screen.add_widget(back_to_menu_button) self.game_screen.add_widget(make_move_button) def reset_game_mode(self, _): self.game = Game([(FirstStupidAlgorithm(), [2, 2])]) self.main_layout.remove_widget(self.main_layout.children[0]) self.main_layout.add_widget(self.game_screen) def start_game(self, _): # FIXME not class method Clock.schedule_interval(self.make_move, 0.1) def make_move(self, _): self.game.move() self.board.refresh(self.game.board) return not self.game.is_game_over() def set_start_mode(self, _): pass def leave(self, button): exit()
def training_loop(training_model, opponent_model, verbose=False): winner = None # for tensor board logging log_dir = ("logs/fit/" + training_model._name + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) tensorboard = ModifiedTensorBoard(log_dir=log_dir) # now execute the q learning y = 0.9 eps = 0.5 interval_size = 500 num_episodes = interval_size * 1 decay_factor = (1000 * eps)**( -1 / num_episodes ) # ensures that eps = 0.001 after `num_episodes` episodes r_avg_list = [] sse_avg_list = [] wins = [] n_moves_list = [] moves_played = [0] * BOARD_SIZE for i in tqdm(range(num_episodes), desc="Training"): as_player = random.choice([0, 1]) eps *= decay_factor g = Game(verbose=verbose) if as_player == 1: # Training as player 1 so opponent makes first move winner, board = g.move(opponent_model.move(g._board, 0)) else: board = g._board done = False r_sum = 0 sse_sum = 0 move_num = 0 while not done: random_move = False move_num += 1 preds = training_model.predict(board, as_player) # To encourage early exploration if np.random.random() < eps: move = np.random.randint(0, BOARD_SIZE - 1) random_move = True else: move = training_model.move(board, as_player) moves_played.append(move) winner, new_board = g.move(move) if winner is None: opponent_move = opponent_model.move(new_board, 1 - as_player) winner, new_board = g.move(opponent_move) # Calculate reward amount if winner == as_player: done = True wins.append(1) r = 1000 - move_num**2 elif winner == 1 - as_player: done = True wins.append(0) r = -(1000 - move_num**2) elif winner == -1: done = True wins.append(None) r = 1000 else: r = move_num if winner is None: target = r + y * np.max( training_model.predict(new_board, as_player)) else: target = r target_vec = deepcopy(preds[0]) target_vec[move] = target training_model.fit_one( as_player, board, np.array([target_vec]), epochs=1, verbose=0, callbacks=[tensorboard], ) new_preds = training_model.predict(board, as_player) sse = sum([(x - y)**2 for x, y in zip(preds[0], target_vec)]) new_sse = sum([(x - y)**2 for x, y in zip(new_preds[0], target_vec)]) sse_sum += sse if verbose: print(f""" {training_model._name} training as player: {as_player}, move: {move_num}, eps: {round(eps, 2)}, old preds: {[round(p, 2) for p in preds[0]]}, rand move: {random_move}, tgt preds: {[round(p, 2) for p in target_vec]}, reward: {r}, new preds: {[round(p, 2) for p in new_preds[0]]}, average last 20 games: {round(sum(r_avg_list[-20:])/20, 2)} sse: {round(sse, 4)} >> {round(new_sse, 4)} """) board = new_board r_sum += r if verbose and ((i % interval_size == 0 and i > 0) or (i == num_episodes - 1)): run_game(training_model, opponent_model, verbose=True) # Collect game level metrics r_avg_list.append(round(r_sum, 2)) n_moves_list.append(move_num) tensorboard.update_stats(reward_sum=r_sum, wins=wins[-1], n_moves_avg=n_moves_list[-1]) tensorboard.update_dist(moves_played=moves_played)
def main(): game = Game([(FirstStupidAlgorithm(), [6, 8])]) print(game.board) while game.is_game_over() == False: game.move() print(game.board)