Exemple #1
0
    def __init__(self):
        self.game = Game()
        self.agent = Agent(SMART_BET)
        self.splitted_games = []

        if LOAD_AI:
            print("Loading AI knowledge..")
            self.agent.load(AI_FILE)

        if FAST_LEARN:
            print(f"Learning {LEARN_ITERATIONS} times..")
            self.agent.Q_run(LEARN_ITERATIONS, True)

        self.autoQL = False
        self.autoPlay = False
        self.action = HIT
        self.split_hand = 0

        card_path = 'resources/cards/'
        self.card_imgs = {}
        for rank in RANKS:
            for suit in SUITS:
                self.card_imgs[(rank, suit)] = pygame.image.load(
                    os.path.join(card_path, f"{rank}_{suit}.png"))
        self.card_back_img = pygame.image.load('resources/cardback.png')

        self.init_display()
        self.render_board()
Exemple #2
0
def test_divergence(algorithm):
    ai = Agent()
    base.load("test_convergence")
    ai.load("test_convergence")

    episodes = int(1e3)
    tolerance = 0.15
    max_diffs = {
        ALG_MC: 5,
        ALG_TD: 10,
        ALG_QL: 5,
    }

    if algorithm == 0 or algorithm == ALG_MC:
        ai_learn(ai, ALG_MC, episodes, print_tester=True)
        ai_compare(base, ai, ALG_MC, tolerance, max_diffs[ALG_MC])

    if algorithm == 0 or algorithm == ALG_TD:
        ai_learn(ai, ALG_TD, episodes, print_tester=True)
        ai_compare(base, ai, ALG_TD, tolerance, max_diffs[ALG_TD])

    if algorithm == 0 or algorithm == ALG_QL:
        ai_learn(ai, ALG_QL, episodes, print_tester=True)
        ai_compare(base, ai, ALG_QL, tolerance, max_diffs[ALG_QL])

    print()
Exemple #3
0
def test_three_steps(algorithm):

    if algorithm == ALG_QL:
        print("This test doesn't apply to Q values.")
        return

    tolerance = 0.001
    max_diffs = 0
    ai = Agent()

    for step in range(0, 3):
        base.load(f"test_state_{step + 1}")
        print(f"Update update #{step + 1}:")

        if algorithm == 0 or algorithm == ALG_MC:
            random.seed(step)
            ai_learn(ai, ALG_MC, 1)
            ai_compare(base, ai, ALG_MC, tolerance, max_diffs)

        if algorithm == 0 or algorithm == ALG_TD:
            random.seed(step)
            ai_learn(ai, ALG_TD, 1)
            ai_compare(base, ai, ALG_TD, tolerance, max_diffs)

        print()
Exemple #4
0
    def __init__(self):
        self.game = Game()
        self.agent = Agent()

        self.autoMC = False
        self.autoTD = False
        self.autoQL = False
        self.autoPlay = False

        card_path = 'resources/cards/'
        self.card_imgs = {}
        for (rank, suit) in cards:
            self.card_imgs[(rank, suit)] = pygame.image.load(os.path.join(card_path, f"{rank}_{suit}.png"))
        self.cBack = pygame.image.load('resources/cardback.png')

        self.init_display()
        self.render_board()
Exemple #5
0
def main():
    ev_manager = EventManager()

    main_frame = MainFrame(ev_manager)
    spinner = CPUSpinnerController(ev_manager)
    keybd = KeyboardController(ev_manager)
    ai = Agent(ev_manager)
    app = App(ev_manager)

    spinner.run()
Exemple #6
0
    def loop(self):
        def spawn_asteroid():
            asteroid = self.register(
                Entity('sim/img/zybel.png',
                       self.screen,
                       self,
                       init_pos=(_random_pos()),
                       init_speed=np.asarray(
                           ((random() - .5) * .5, random() * .3 + .6)) * 3))
            self.entities[asteroid].appearance = pg.transform.scale(
                self.entities[asteroid].original,
                (np.asarray(self.entities[asteroid].original.get_size()) *
                 (random() * .4 + 1)).astype('uint8'))
            # self.entities[asteroid].set('on_deregister', spawn_asteroid)
            self.type = 'asteroid'
            return self.watcher.update(0, self.entities[asteroid].sim_id)

        def _radial(pos,
                    angle,
                    center=[x / 2 for x in self.res],
                    verbose=False):
            functions = [cos, sin]
            relational = pos - np.asarray(center)
            r = sqrt(sum((relational)**2))
            if verbose:
                print(r)
            curr_angle = atan2(relational[1], relational[0])

            return np.asarray([f(curr_angle + angle) * r
                               for f in functions]) + center

        def _get_angle(pos, center=[x / 2 for x in self.res]):
            relational = pos - np.asarray(center)
            return atan2(relational[1], relational[0])

        def _random_pos():

            return self.res[0] * random(), 0

        def sigmoid(x):
            return 1 / (1 + exp(-x))

        data = None
        try:
            with open('sim/pickle_data/data.pck', 'rb+') as f:
                try:
                    data = load(f)
                except:
                    data = {}
        except:
            data = {
                '0': {
                    'shoot_angle': 0,
                    'ast_angle': 0,
                    'ast_vel': 0,
                    'hit': 1
                }
            }

        self.watcher = Watcher()
        # Asteroid sim_id
        self.watcher.objects.append(1)
        # Kill counter
        self.watcher.objects.append(0)
        # Asteroid velocity
        self.watcher.objects.append(0)
        # Sim data
        self.watcher.objects.append(data)

        pg.font.init()
        font = pg.font.SysFont('arial', 15)
        start_time = time()
        counter_time = time()
        learning_time = time()
        spawner_time = time()
        running = True
        agent = Agent().fit(data)

        char = self.register(
            Controllable('sim/img/img1.png',
                         self.screen,
                         self,
                         init_pos=(np.array((240, 420)))))

        scene = pg.image.load('sim/img/asteroids/scene.png')

        shoot_pos = None
        bullets_per_second = 3
        bullet_counter = 1

        while running:
            try:
                for event in pg.event.get():
                    if event.type == pg.QUIT:
                        running = False

                    elif event.type == pg.KEYDOWN:
                        if event.key == pg.K_r:
                            self = Sim(self.res)
                            self.loop()
                            running = False

                    elif event.type == pg.MOUSEBUTTONDOWN:
                        if pg.mouse.get_pressed()[0]:
                            shoot_pos = pg.mouse.get_pos()
                            print(shoot_pos)

            except SystemExit:
                running = False

            # AI
            if time() - learning_time > 60:
                agent = Agent().fit(data)
                learning_time = time()

            if time() - counter_time > (1. / bullets_per_second) * 60 / fps:
                if shoot_pos:
                    counter_time = time()
                    # a = agent.predict(np.hstack([np.asarray([_get_angle(self.get_entity(self.watcher.get(asteroid)).get('pos')),
                    #                                          self.watcher.get(2)]).reshape(-1, 2)]))
                    shoot_vec = (np.asarray(shoot_pos) if shoot_pos else (random(), random()) * np.array(self.res)) - \
                                self.entities[char].pos
                    shoot_vec = shoot_vec / sqrt(sum(shoot_vec**2))
                    bullet_id = self.entities[char].fire(shoot_vec, 10)
                    data[self.entities[bullet_id].get('sim_id')] = {
                        'shoot_pos': shoot_vec,
                        'ast_pos': None,
                        'hit': 0
                    }
                    bullet_counter += 1
                    shoot_pos = None

            if time() - spawner_time > .5:
                spawner_time = time()
                if random() > .2 \
                        and len([1 for x in self.entities if x.type == 'asteroid']) < (time() - start_time) / 60 \
                        and len([1 for x in self.entities if x.type == 'asteroid']) < 8:
                    spawn_asteroid()

            self.clock.tick(fps)
            self.screen.fill(colors['white'])

            # Draw scene
            self.screen.blit(scene, (0, 0))

            # Draw FPS counter
            pg.draw.rect(self.screen, colors['black'], [400, 0, 480, 30])
            self.screen.blit(
                font.render(str(self.clock.get_fps()), 1, colors['white']),
                (400, 0))

            # Draw kill counter
            pg.draw.rect(self.screen, colors['black'], [0, 0, 80, 30])
            self.screen.blit(
                font.render(str(self.watcher.get(1) * 1. / bullet_counter), 1,
                            colors['white']), (0, 0))

            self.sim()
            pg.display.flip()
Exemple #7
0
class GameRunner:
    def __init__(self):
        self.game = Game()
        self.agent = Agent()

        self.autoMC = False
        self.autoTD = False
        self.autoQL = False
        self.autoPlay = False

        card_path = 'resources/cards/'
        self.card_imgs = {}
        for (rank, suit) in cards:
            self.card_imgs[(rank, suit)] = pygame.image.load(
                os.path.join(card_path, f"{rank}_{suit}.png"))
        self.cBack = pygame.image.load('resources/cardback.png')

        self.init_display()
        self.render_board()

    def init_display(self):
        #Initialize Game
        pygame.init()
        self.screen = pygame.display.set_mode((640, 480))
        pygame.display.set_caption('Blackjack')
        self.font = pygame.font.SysFont("arial", 15)

        self.hitTxt = self.font.render('[H]it', 1, BLACK)
        self.standTxt = self.font.render('[S]tand', 1, BLACK)

        modes = ["off", "on"]
        self.MCTxt = [
            self.font.render('[M]C - ' + mode, 1, BLUE) for mode in modes
        ]
        self.TDTxt = [
            self.font.render('[T]D - ' + mode, 1, BLUE) for mode in modes
        ]
        self.QLTxt = [
            self.font.render('[Q]L - ' + mode, 1, BLUE) for mode in modes
        ]
        self.playTxt = [
            self.font.render('[A]uto Play - ' + mode, 1, BLUE)
            for mode in modes
        ]
        self.gameoverTxt = [
            self.font.render('End of Round. You WON!', 1, RED),
            self.font.render('End of Round. You LOST!', 1, RED)
        ]

        self.ops_instr = self.font.render(
            'Click on the button or type the initial character of the operation to play or toggle modes',
            1, BLACK)
        self.save_instr = self.font.render('Press 1 to save AI state', 1,
                                           BLACK)
        self.load_instr = self.font.render(
            'Press 2 to load from AI\'s saved state', 1, BLACK)

        self.background = pygame.Surface(self.screen.get_size())
        self.background = self.background.convert()
        self.background.fill((0x00, 0x62, 0xbe))
        self.hitB = pygame.draw.rect(self.background, WHITE,
                                     (10, OPS_BTN_Y, 75, OPS_BTN_HEIGHT))
        self.standB = pygame.draw.rect(self.background, WHITE,
                                       (95, OPS_BTN_Y, 75, OPS_BTN_HEIGHT))

    def loop(self):
        while True:
            # Our state information does not take into account of number of cards

            if self.autoMC:
                #MC Learning
                #Compute the values of all states under the default policy (see ai.py)
                self.agent.MC_run(50)
            if self.autoTD:
                #TD Learning
                #Compute the values of all states under the default policy (see ai.py)
                self.agent.TD_run(50)
            if self.autoQL:
                #Q-Learning
                #For each state, compute the Q value of the action "Hit" and "Stand"
                self.agent.Q_run(50)

            if self.autoPlay:
                if self.game.game_over() or self.game.stand:
                    self.game.update_stats()
                    self.game.reset()

                decision = self.agent.autoplay_decision(
                    copy.deepcopy(self.game.state))
                if decision == 0:
                    self.game.act_hit()
                else:
                    self.game.act_stand()

            self.handle_user_action()
            self.render_board()

    def check_act_MC(self, event):
        clicked = event.type == MOUSEBUTTONDOWN and self.MCB.collidepoint(
            pygame.mouse.get_pos())
        pressed = event.type == KEYDOWN and event.key == K_m
        return clicked or pressed

    def check_act_TD(self, event):
        clicked = event.type == MOUSEBUTTONDOWN and self.TDB.collidepoint(
            pygame.mouse.get_pos())
        pressed = event.type == KEYDOWN and event.key == K_t
        return clicked or pressed

    def check_act_QL(self, event):
        clicked = event.type == MOUSEBUTTONDOWN and self.QLB.collidepoint(
            pygame.mouse.get_pos())
        pressed = event.type == KEYDOWN and event.key == K_q
        return clicked or pressed

    def check_act_autoplay(self, event):
        clicked = event.type == MOUSEBUTTONDOWN and self.playB.collidepoint(
            pygame.mouse.get_pos())
        pressed = event.type == KEYDOWN and event.key == K_a
        return clicked or pressed

    def check_act_hit(self, event):
        clicked = event.type == MOUSEBUTTONDOWN and self.hitB.collidepoint(
            pygame.mouse.get_pos())
        pressed = event.type == KEYDOWN and event.key == K_h

        return not self.game.game_over() and not self.autoPlay and (clicked
                                                                    or pressed)

    def check_act_stand(self, event):
        clicked = event.type == MOUSEBUTTONDOWN and self.standB.collidepoint(
            pygame.mouse.get_pos())
        pressed = event.type == KEYDOWN and event.key == K_s

        return not self.game.game_over() and not self.autoPlay and (clicked
                                                                    or pressed)

    def check_reset(self, event):
        clicked = event.type == MOUSEBUTTONDOWN
        pressed = event.type == KEYDOWN

        return self.game.game_over() and not self.autoPlay and (clicked
                                                                or pressed)

    def handle_user_action(self):
        for event in pygame.event.get():
            if event.type == QUIT:
                pygame.quit()
                sys.exit()

            # Clicking the white buttons can start or pause the learning processes
            elif self.check_act_MC(event):
                self.autoMC = not self.autoMC
            elif self.check_act_TD(event):
                self.autoTD = not self.autoTD
            elif self.check_act_QL(event):
                self.autoQL = not self.autoQL
            elif self.check_act_autoplay(event):
                self.autoPlay = not self.autoPlay

            elif self.check_act_hit(event):
                self.game.act_hit()

            elif self.check_act_stand(event):
                self.game.act_stand()

            elif self.check_reset(event):
                self.game.update_stats()
                self.game.reset()

            if event.type == KEYDOWN:
                if event.key == K_x:
                    pygame.quit()
                    sys.exit()
                if event.key == K_1:
                    self.agent.save("saved")
                elif event.key == K_2:
                    self.agent.load("saved")

    @staticmethod
    def draw_label_hl(surface,
                      pos,
                      label,
                      padding=PADDING,
                      bg=WHITE,
                      wd=2,
                      border=True):
        specs = [(bg, 0)]
        if border:
            specs += [(BLACK, wd)]
        for color, width in specs:
            x = pos[0] - padding
            y = pos[1] - padding
            w = label.get_width() + padding * 2
            h = label.get_height() + padding * 2
            pygame.draw.rect(surface, color, (x, y, w, h), width)

    def render_board(self):
        winTxt = self.font.render('Wins: {}'.format(self.game.winNum), 1,
                                  WHITE)
        loseTxt = self.font.render('Losses: {}'.format(self.game.loseNum), 1,
                                   WHITE)
        if self.game.loseNum == 0 and self.game.winNum == 0:
            win_rate = 0.
        else:
            win_rate = self.game.winNum / (self.game.winNum +
                                           self.game.loseNum)
        win_rate_txt = self.font.render(
            'Win rate: {:.2f}%'.format(win_rate * 100), 1, WHITE)

        button_colors = [RED, GREEN]
        self.MCB = pygame.draw.rect(self.background,
                                    button_colors[self.autoMC],
                                    (180, OPS_BTN_Y, 75, OPS_BTN_HEIGHT))
        self.TDB = pygame.draw.rect(self.background,
                                    button_colors[self.autoTD],
                                    (265, OPS_BTN_Y, 75, OPS_BTN_HEIGHT))
        self.QLB = pygame.draw.rect(self.background,
                                    button_colors[self.autoQL],
                                    (350, OPS_BTN_Y, 75, OPS_BTN_HEIGHT))
        self.playB = pygame.draw.rect(self.background,
                                      button_colors[self.autoPlay],
                                      (435, OPS_BTN_Y, 115, OPS_BTN_HEIGHT))

        state_info = self.font.render(
            'State (user_sum, user_has_Ace, dealer_first) ={}'.format(
                self.game.state), 1, BLACK)
        MCU = self.font.render(
            'Current state\'s (MC value, #samples): ({:f}, {})'.format(
                self.agent.MC_values[self.game.state],
                self.agent.N_MC[self.game.state]), 1, BLACK)

        TDU = self.font.render(
            'Current state\'s (TD value, #samples): ({:f}, {})'.format(
                self.agent.TD_values[self.game.state],
                self.agent.N_TD[self.game.state]), 1, BLACK)

        QV = self.font.render(
            'Current stats\'s Q values ([Hit, Stand], #samples): ([{:f},{:f}], {})'
            .format(
                self.agent.Q_values[self.game.state][0],
                self.agent.Q_values[self.game.state][1],
                self.agent.N_Q[self.game.state],
            ), 1, BLACK)

        self.screen.blit(self.background, (0, 0))
        self.screen.blit(self.hitTxt, (37, OPS_TXT_Y))
        self.screen.blit(self.standTxt, (113, OPS_TXT_Y))
        self.screen.blit(self.MCTxt[self.autoMC], (190, OPS_TXT_Y))
        self.screen.blit(self.TDTxt[self.autoTD], (277, OPS_TXT_Y))
        self.screen.blit(self.QLTxt[self.autoQL], (359, OPS_TXT_Y))
        self.screen.blit(self.playTxt[self.autoPlay], (444, OPS_TXT_Y))
        self.screen.blit(self.ops_instr, (OPS_INSTR_X, OPS_INSTR_Y))

        for width, color in [(0, WHITE), (2, BLACK)]:
            pygame.draw.rect(self.screen, color, (10, 170, 600, 95), width)
        self.screen.blit(state_info, (20, 180))
        self.screen.blit(MCU, (20, 200))
        self.screen.blit(TDU, (20, 220))
        self.screen.blit(QV, (20, 240))

        self.screen.blit(winTxt, (520, 23))
        self.screen.blit(loseTxt, (520, 48))
        self.screen.blit(win_rate_txt, (520, 73))

        self.screen.blit(self.save_instr, (350, 380))
        self.screen.blit(self.load_instr, (350, 400))

        for i, card in enumerate(self.game.userCard):
            x = 10 + i * 20
            self.screen.blit(self.card_imgs[card], (x, USR_CARD_HEIGHT))

        if self.game.game_over() or self.game.stand:
            if self.game.state == WIN_STATE:
                result_txt = self.gameoverTxt[0]
            else:
                result_txt = self.gameoverTxt[1]
            self.draw_label_hl(self.screen, GAME_OVER_TEXT_POS, result_txt)
            self.screen.blit(result_txt, GAME_OVER_TEXT_POS)
            for i, card in enumerate(self.game.dealCard):
                x = 10 + i * 20
                self.screen.blit(self.card_imgs[card], (x, 10))
        else:
            self.screen.blit(self.card_imgs[self.game.dealCard[0]], (10, 10))
            self.screen.blit(self.cBack, (30, 10))

        pygame.display.update()
Exemple #8
0
            if abs(ai.Q_values[state][0] -
                   ai_base.Q_values[state][0]) > tolerance:
                diff += 1
            if abs(ai.Q_values[state][1] -
                   ai_base.Q_values[state][1]) > tolerance:
                diff += 1

    if diff <= max_diffs:
        print("++++ PASSED {} with {} wrong values".format(
            ALG_TXT[algorithm], diff))
    else:
        print("---- FAILED {} with {} wrong values".format(
            ALG_TXT[algorithm], diff))


base = Agent()


def test_three_steps(algorithm):

    if algorithm == ALG_QL:
        print("This test doesn't apply to Q values.")
        return

    tolerance = 0.001
    max_diffs = 0
    ai = Agent()

    for step in range(0, 3):
        base.load(f"test_state_{step + 1}")
        print(f"Update update #{step + 1}:")
Exemple #9
0
from kivy.properties import NumericProperty, ReferenceListProperty, ObjectProperty

from ai import Agent  # AI using Double DQN with Prioritized Experience Replay

# Add this line if we don't want the right click to put a red point
Config.set('input', 'mouse', 'mouse,multitouch_on_demand')

# Introduce last_x and last_y, used to keep the last point in memory when we draw the sand on the map
last_x = 0
last_y = 0
length = 0  # the length of the last drawing
n_points = 0  # the total number of points in the last drawing

# Get our AI, which we call 'agent', and that contains our Neural Network that represent our Q-function
agent = Agent(
    input_sz=5, output_sz=3,
    hidden_layer_sizes=[100])  # 5 inputs (dimensionality), 3 outputs (actions)
TEMPERATURE = 50

action2rotation = [0, 20, -20]
scores = []  # sliding window of rewards with respect to time
last_reward = 0
last_signal = [0, 0, 0, 0, 0]
last_distance = 0

# Constant reward
PUNISHMENT = -6.0
GOAL_REWARD = 2.0
STEP_COST = -0.3
CLOSER_REWARD = 0.2
Exemple #10
0
else:
    print(f("{v} isn't a valid player, please use 'X' or 'O'"))
    print("Using default...")
    player = Player.X

print("Player starting" if player == Player.X else "Player second")

# Use precalculated policy from file.
# If file not present, find the policy via value iteration.
try:
    print("Reading the AI policy")
    if player == player.X:
        file = open("policy_O.pkl", "rb")
    else:
        file = open("policy_X.pkl", "rb")
    policy = pickle.load(file)
    ai = Agent(game, policy)
except:
    print("Failed to read the AI policy, calculating it now...")
    print("To precalculate and save the policy run valueiteration.py")
    ai = ReinforcementAgent(game, ~player)
print("Ready to play!!!")

assert (len(sys.argv) == 2)

# TODO : Add possibility for player to play as O
while True:
    if not game.finished() and game.player is ~player:
        ai.play()
    handle_events(game)
    draw_board(game.board)
Exemple #11
0
from ai import Agent

ego = Agent.create()

print ego



Exemple #12
0
class GameRunner:
    def __init__(self):
        self.game = Game()
        self.agent = Agent(SMART_BET)
        self.splitted_games = []

        if LOAD_AI:
            print("Loading AI knowledge..")
            self.agent.load(AI_FILE)

        if FAST_LEARN:
            print(f"Learning {LEARN_ITERATIONS} times..")
            self.agent.Q_run(LEARN_ITERATIONS, True)

        self.autoQL = False
        self.autoPlay = False
        self.action = HIT
        self.split_hand = 0

        card_path = 'resources/cards/'
        self.card_imgs = {}
        for rank in RANKS:
            for suit in SUITS:
                self.card_imgs[(rank, suit)] = pygame.image.load(
                    os.path.join(card_path, f"{rank}_{suit}.png"))
        self.card_back_img = pygame.image.load('resources/cardback.png')

        self.init_display()
        self.render_board()

    def init_display(self):
        #Initialize Game
        pygame.init()
        self.screen = pygame.display.set_mode(WINDOW_SIZE)
        pygame.display.set_caption('Blackjack')
        self.font = pygame.font.SysFont("arial", 15)

        self.hit_txt = self.font.render('[H]it', 1, BLACK)
        self.stand_txt = self.font.render('[S]tand', 1, BLACK)
        self.double_txt = self.font.render('[D]ouble', 1, BLACK)
        self.split_txt = self.font.render('Spli[t]', 1, BLACK)
        self.split_hand_txt = self.font.render(
            'Split: ' + str(self.split_hand), 1, BLACK)

        modes = ["OFF", "ON"]
        self.QL_txt = [
            self.font.render('[Q]L - ' + mode, 1, BLUE) for mode in modes
        ]
        self.autoplay_txt = [
            self.font.render('[A]uto Play - ' + mode, 1, BLUE)
            for mode in modes
        ]
        self.gameover_txt = [
            self.font.render('You WON!', 1, RED),
            self.font.render('You LOST!', 1, RED),
            self.font.render('You WON!\nBLACKJACK!', 1, RED),
            self.font.render('DRAW!', 1, RED)
        ]

        self.ops_instr = self.font.render(
            'Click on the button or press corresponding key to play', 1, BLACK)
        self.save_instr = self.font.render('Press 1 to save AI state', 1,
                                           BLACK)
        self.load_instr = self.font.render('Press 2 to load AI state', 1,
                                           BLACK)

        self.background = pygame.Surface(self.screen.get_size())
        self.background = self.background.convert()
        self.background.fill((0xa0, 0xa0, 0xa0))
        self.hit_btn = pygame.draw.rect(self.background, WHITE,
                                        (10, OPS_BTN_Y, 75, OPS_BTN_HEIGHT))
        self.stand_btn = pygame.draw.rect(self.background, WHITE,
                                          (95, OPS_BTN_Y, 75, OPS_BTN_HEIGHT))
        self.double_btn = pygame.draw.rect(
            self.background, WHITE, (180, OPS_BTN_Y, 75, OPS_BTN_HEIGHT))
        self.split_btn = pygame.draw.rect(self.background, WHITE,
                                          (265, OPS_BTN_Y, 75, OPS_BTN_HEIGHT))

    def loop(self):
        if FAST_SIM:
            print(f"Simulating {SIM_ITERATIONS} times..")
            i = 0
            last_checked_profit = 0
            results_100_games_file = open("results.txt", "w")

            while i < SIM_ITERATIONS:
                if self.game.is_game_over() or self.game.stand:
                    self.game.update_stats()
                    self.next_game()

                    i += 1
                    if i % (SIM_ITERATIONS / 10) == 0:
                        print("[ ", i * 100 / SIM_ITERATIONS, "%]")
                    if self.game.num_games % GAMES_PER_STAT_TRACK == 0 and self.game.num_games != 0:
                        results_100_games_file.write(
                            str(self.game.profit - last_checked_profit))
                        results_100_games_file.write("\n")
                        last_checked_profit = self.game.profit
                else:
                    decision = self.agent.autoplay_decision(
                        self.game.state, self.game.can_double(),
                        self.game.can_split())
                    if decision == HIT:
                        self.game.act_hit()
                    elif decision == STAND:
                        self.game.act_stand()
                    elif decision == DOUBLE:
                        self.game.act_double()
                    elif decision == SPLIT:
                        self.split_games()

            results_100_games_file.close()
            self.render_board()

        while True:
            update_display = False

            # State information does not take into account of number of cards
            if self.autoQL:
                # Q-Learning
                update_display = True
                self.agent.Q_run(5)

            if self.autoPlay:
                update_display = True
                if self.game.is_game_over():
                    self.game.update_stats()
                    self.next_game()
                else:
                    decision = self.agent.autoplay_decision(
                        copy.deepcopy(self.game.state), self.game.can_double(),
                        self.game.can_split())
                    if decision == HIT:
                        self.game.act_hit()
                    elif decision == STAND:
                        self.game.act_stand()
                    elif decision == DOUBLE:
                        self.game.act_double()
                    elif decision == SPLIT:
                        self.split_games()

            if self.handle_user_action() or update_display:
                self.render_board()

    def split_games(self):
        self.split_hand = 1
        self.game.act_split()
        self.splitted_games.append(copy.deepcopy(self.game))
        self.game.act_hit()
        self.splitted_games[-1].act_hit()
        if self.game.player_cards[0][0] == ACE and ACE_SPLIT_DEAL_ONE_CARD:
            self.game.act_stand()
            self.splitted_games[len(self.splitted_games) - 1].act_stand()

    def next_game(self):
        # Check if there is a split game
        if len(self.splitted_games) != 0:
            self.next_split_hand()
        else:
            self.split_hand = 0
            self.game.reset_game()
            self.game.bet = self.agent.calculate_bet_amount(
                self.game.true_count)

    def next_split_hand(self):
        self.split_hand += 1
        self.splitted_games[-1].sync(self.game)
        self.game = self.splitted_games[-1]
        self.splitted_games.pop()

    def check_act_QL(self, event):
        clicked = event.type == MOUSEBUTTONDOWN and self.QL_btn.collidepoint(
            pygame.mouse.get_pos())
        pressed = event.type == KEYDOWN and event.key == K_q
        return clicked or pressed

    def check_act_autoplay(self, event):
        clicked = event.type == MOUSEBUTTONDOWN and self.autoplay_btn.collidepoint(
            pygame.mouse.get_pos())
        pressed = event.type == KEYDOWN and event.key == K_a
        return clicked or pressed

    def check_act_hit(self, event):
        clicked = event.type == MOUSEBUTTONDOWN and self.hit_btn.collidepoint(
            pygame.mouse.get_pos())
        pressed = event.type == KEYDOWN and event.key == K_h

        return not self.game.is_game_over() and not self.autoPlay and (
            clicked or pressed)

    def check_act_stand(self, event):
        clicked = event.type == MOUSEBUTTONDOWN and self.stand_btn.collidepoint(
            pygame.mouse.get_pos())
        pressed = event.type == KEYDOWN and event.key == K_s

        return not self.game.is_game_over() and not self.autoPlay and (
            clicked or pressed)

    def check_act_double(self, event):
        clicked = event.type == MOUSEBUTTONDOWN and self.double_btn.collidepoint(
            pygame.mouse.get_pos())
        pressed = event.type == KEYDOWN and event.key == K_d

        return not self.game.is_game_over() and not self.autoPlay and (
            clicked or pressed)

    def check_act_split(self, event):
        clicked = event.type == MOUSEBUTTONDOWN and self.split_btn.collidepoint(
            pygame.mouse.get_pos())
        pressed = event.type == KEYDOWN and event.key == K_t

        return not self.game.is_game_over() and not self.autoPlay and (
            clicked or pressed)

    def check_reset(self, event):
        clicked = event.type == MOUSEBUTTONDOWN
        pressed = event.type == KEYDOWN

        return self.game.is_game_over() and not self.autoPlay and (clicked
                                                                   or pressed)

    # Return whether to change display
    def handle_user_action(self):
        update_display = False
        for event in pygame.event.get():
            if event.type == QUIT:
                pygame.quit()
                sys.exit()
            elif self.check_act_QL(event):
                self.autoQL = not self.autoQL
            elif self.check_act_autoplay(event):
                self.autoPlay = not self.autoPlay
            elif self.check_act_hit(event):
                self.game.act_hit()
                update_display = True
            elif self.check_act_stand(event):
                self.game.act_stand()
                update_display = True
            elif self.check_act_double(event):
                if self.game.can_double():
                    self.game.act_double()
                    update_display = True
            elif self.check_act_split(event):
                if self.game.can_split():
                    self.split_games()
                    update_display = True
            elif self.check_reset(event):
                self.game.update_stats()
                self.next_game()
                update_display = True
            if event.type == KEYDOWN:
                if event.key == K_x:
                    pygame.quit()
                    sys.exit()
                if event.key == K_1:
                    self.agent.save(AI_FILE)
                elif event.key == K_2:
                    self.agent.load(AI_FILE)
                    update_display = True
                if event.key == K_p:
                    self.game.print_counts()
                if event.key == K_v:
                    self.agent.print_decision_value()

            return update_display

    @staticmethod
    def draw_label_hl(surface,
                      pos,
                      label,
                      padding=PADDING,
                      bg=WHITE,
                      wd=2,
                      border=True):
        specs = [(bg, 0)]
        if border:
            specs += [(BLACK, wd)]
        for color, width in specs:
            x = pos[0] - padding
            y = pos[1] - padding
            w = label.get_width() + padding * 2
            h = label.get_height() + padding * 2
            pygame.draw.rect(surface, color, (x, y, w, h), width)

    def render_board(self):
        curr_true_count_txt = self.font.render(
            'True count: {}'.format(self.game.true_count), 1, WHITE)
        curr_bet_txt = self.font.render('Bet: {}'.format(self.game.bet), 1,
                                        WHITE)

        # Game Stats
        if self.game.num_games == 0:
            win_rate = 0
            draw_rate = 0
            lose_rate = 0
            blackjack_rate = 0
        else:
            blackjack_rate = self.game.num_blackjacks / self.game.num_games
            win_rate = self.game.num_wins / self.game.num_games
            draw_rate = self.game.num_draws / self.game.num_games
            lose_rate = self.game.num_losses / self.game.num_games

        num_games_txt = self.font.render(
            'Number of games: {}'.format(self.game.num_games), 1, WHITE)
        blackjack_rate_txt = self.font.render(
            'Blackjack rate: {:.2f}%'.format(blackjack_rate * 100), 1, WHITE)
        win_rate_txt = self.font.render(
            'Win rate: {:.2f}%'.format(win_rate * 100), 1, WHITE)
        draw_rate_txt = self.font.render(
            'Draw rate: {:.2f}%'.format(draw_rate * 100), 1, WHITE)
        lose_rate_txt = self.font.render(
            'Loss rate: {:.2f}%'.format(lose_rate * 100), 1, WHITE)

        # Bank Stats
        amount_played_txt = self.font.render(
            'Amount played: {}'.format(self.game.amount_played), 1, WHITE)
        max_profit_txt = self.font.render(
            'Max win: {}'.format(self.game.max_profit), 1, WHITE)
        max_loss_txt = self.font.render(
            'Max loss: {}'.format(self.game.max_loss), 1, WHITE)
        max_loss_streak_txt = self.font.render(
            'Max continuous loss: {}'.format(self.game.max_loss_streak), 1,
            WHITE)
        max_win_streak_txt = self.font.render(
            'Max continuous win: {}'.format(self.game.max_win_streak), 1,
            WHITE)
        profit_txt = self.font.render('Profit: {}'.format(self.game.profit), 1,
                                      WHITE)

        self.split_hand_txt = self.font.render(
            'Split: ' + str(self.split_hand), 1, BLACK)

        button_colors = [RED, GREEN]
        self.QL_btn = pygame.draw.rect(self.background,
                                       button_colors[self.autoQL],
                                       (350, OPS_BTN_Y, 75, OPS_BTN_HEIGHT))
        self.autoplay_btn = pygame.draw.rect(
            self.background, button_colors[self.autoPlay],
            (435, OPS_BTN_Y, 115, OPS_BTN_HEIGHT))

        state_info = self.font.render(
            'State (player_sum, player_has_Ace, dealer_first) ={}'.format(
                self.game.state), 1, BLACK)

        QV = self.font.render(
            'Current stats\'s Q values ([Hit, Stand], #samples): ([{:f},{:f}], {})'
            .format(
                self.agent.Q_values[self.game.state][0],
                self.agent.Q_values[self.game.state][1],
                self.agent.N_Q[self.game.state],
            ), 1, BLACK)

        d = self.agent.autoplay_decision(self.game.state,
                                         self.game.can_double(),
                                         self.game.can_split())

        if d == HIT:
            self.action = "HIT"
        elif d == STAND:
            self.action = "STAND"
        elif d == DOUBLE:
            self.action = "DOUBLE"
        elif d == SPLIT:
            self.action = "SPLIT"

        STRATEGY = self.font.render(
            'Recommended action: {}'.format(self.action, ), 1, BLACK)

        self.screen.blit(self.background, (0, 0))
        self.screen.blit(self.hit_txt, (37, OPS_TXT_Y))
        self.screen.blit(self.stand_txt, (113, OPS_TXT_Y))
        self.screen.blit(self.double_txt, (190, OPS_TXT_Y))
        self.screen.blit(self.split_txt, (270, OPS_TXT_Y))
        self.screen.blit(self.QL_txt[self.autoQL], (359, OPS_TXT_Y))
        self.screen.blit(self.autoplay_txt[self.autoPlay], (444, OPS_TXT_Y))
        self.screen.blit(self.ops_instr, (OPS_INSTR_X, OPS_INSTR_Y))

        for width, color in [(0, WHITE), (2, BLACK)]:
            pygame.draw.rect(self.screen, color, (10, 170, 600, 95), width)
        self.screen.blit(state_info, (20, 180))
        self.screen.blit(QV, (20, 220))
        self.screen.blit(STRATEGY, (20, 240))

        self.screen.blit(curr_bet_txt, (200, 10))
        self.screen.blit(curr_true_count_txt, (200, 30))

        self.screen.blit(num_games_txt, (350, 10))
        self.screen.blit(blackjack_rate_txt, (350, 30))
        self.screen.blit(win_rate_txt, (350, 50))
        self.screen.blit(draw_rate_txt, (350, 70))
        self.screen.blit(lose_rate_txt, (350, 90))

        self.screen.blit(amount_played_txt, (500, 10))
        self.screen.blit(max_profit_txt, (500, 30))
        self.screen.blit(max_loss_txt, (500, 50))
        self.screen.blit(max_loss_streak_txt, (500, 70))
        self.screen.blit(max_win_streak_txt, (500, 90))
        self.screen.blit(profit_txt, (500, 110))

        self.screen.blit(self.split_hand_txt, (350, 300))
        self.screen.blit(self.save_instr, (350, 380))
        self.screen.blit(self.load_instr, (350, 400))

        for i, card in enumerate(self.game.player_cards):
            x = 10 + i * 20
            self.screen.blit(self.card_imgs[card], (x, USR_CARD_HEIGHT))

        if self.game.is_game_over() or self.game.stand:
            if self.game.state == STATE_WIN:
                result_txt = self.gameover_txt[0]
            elif self.game.state == STATE_BLACKJACK:
                result_txt = self.gameover_txt[2]
            elif self.game.state == STATE_DRAW:
                result_txt = self.gameover_txt[3]
            else:
                result_txt = self.gameover_txt[1]
            self.draw_label_hl(self.screen, GAME_OVER_TEXT_POS, result_txt)
            self.screen.blit(result_txt, GAME_OVER_TEXT_POS)
            for i, card in enumerate(self.game.dealer_cards):
                x = 10 + i * 20
                self.screen.blit(self.card_imgs[card], (x, 10))
        else:
            self.screen.blit(self.card_imgs[self.game.dealer_cards[0]],
                             (10, 10))
            self.screen.blit(self.card_back_img, (30, 10))

        pygame.display.update()
    def loop(self):
        def spawn_asteroid():
            asteroid = self.register(
                Entity('sim/img/zybel.png',
                       self.screen,
                       self,
                       init_pos=(_random_pos())))
            print(
                sqrt(sum(
                    (self.entities[asteroid].get('pos') - (240, 240))**2)))
            self.entities[asteroid].set('on_deregister', spawn_asteroid)
            self.watcher.update(1, self.watcher.get(1) + 1)
            self.watcher.update(2, .4 * (1 if random() > .5 else -1))
            return self.watcher.update(0, self.entities[asteroid].sim_id)

        def _radial(pos,
                    angle,
                    center=[x / 2 for x in self.res],
                    verbose=False):
            functions = [cos, sin]
            relational = pos - np.asarray(center)
            r = sqrt(sum((relational)**2))
            if verbose:
                print(r)
            curr_angle = atan2(relational[1], relational[0])

            return np.asarray([f(curr_angle + angle) * r
                               for f in functions]) + center

        def _get_angle(pos, center=[x / 2 for x in self.res]):
            relational = pos - np.asarray(center)
            return atan2(relational[1], relational[0])

        def _random_pos():
            x = random() * 120
            y = sqrt(120**2 - x**2)

            return x + 240, y + 240

        def sigmoid(x):
            return 1 / (1 + exp(-x))

        data = None
        try:
            with open('sim/pickle_data/data_zybel.pck', 'rb+') as f:
                try:
                    data = load(f)
                except:
                    data = {}
        except:
            data = {
                '0': {
                    'shoot_angle': 0,
                    'ast_angle': 0,
                    'ast_vel': 0,
                    'hit': 1
                }
            }
        data_init_len = len(data)

        self.watcher = Watcher()
        # Asteroid sim_id
        self.watcher.objects.append(1)
        # Kill counter
        self.watcher.objects.append(0)
        # Asteroid velocity
        self.watcher.objects.append(0)
        # Sim data
        self.watcher.objects.append(data)

        pg.font.init()
        font = pg.font.SysFont('arial', 15)
        start_time = time()
        counter_time = time()
        learning_time = time()
        running = True
        agent = Agent().fit(data)

        char = self.register(
            Controllable('sim/img/img1.png',
                         self.screen,
                         self,
                         init_pos=(np.array((240, 240)))))

        asteroid = spawn_asteroid()

        bullets_per_second = 1
        bullet_counter = 1

        while running:
            try:
                for event in pg.event.get():
                    if event.type == pg.QUIT:
                        running = False

                    elif event.type == pg.KEYDOWN:
                        if event.key == pg.K_r:
                            self = Sim(self.res)
                            self.loop()
                            running = False

            except SystemExit:
                running = False

            # AI
            if time() - learning_time > 5:
                agent = Agent().fit(data)
                learning_time = time()

            if time() - counter_time > (1. / bullets_per_second) * 60 / fps:
                counter_time = time()
                a = agent.predict(
                    np.hstack([
                        np.asarray([
                            _get_angle(
                                self.get_entity(
                                    self.watcher.get(asteroid)).get('pos')),
                            self.watcher.get(2)
                        ]).reshape(-1, 2)
                    ]))
                # a = random() * 3.141 * 2
                bullet_id = self.entities[char].fire(a, 3.5)
                data[self.entities[bullet_id].get('sim_id')] = {
                    'shoot_angle':
                    a,
                    'ast_angle':
                    _get_angle(
                        self.get_entity(
                            self.watcher.get(asteroid)).get('pos')),
                    'ast_vel':
                    self.watcher.get(2),
                    'hit':
                    0
                }
                bullet_counter += 1

            self.clock.tick(fps)
            self.screen.fill(colors['white'])

            # Draw helper lines
            pg.draw.line(self.screen, colors['black'], [0, 240], [480, 240])
            pg.draw.line(self.screen, colors['black'], [240, 0], [240, 480])
            pg.draw.circle(self.screen, colors['black'], [240, 240], 136, 2)

            # Draw angles
            self.screen.blit(
                font.render(str(int(_get_angle((240, 0)) * 360 / 3.141 / 2)),
                            1, colors['black']), (235, 10))
            self.screen.blit(
                font.render(str(int(_get_angle((240, 480)) * 360 / 3.141 / 2)),
                            1, colors['black']), (235, 460))
            self.screen.blit(
                font.render(str(int(_get_angle((0, 240)) * 360 / 3.141 / 2)),
                            1, colors['black']), (10, 240))
            self.screen.blit(
                font.render(str(int(_get_angle((480, 240)) * 360 / 3.141 / 2)),
                            1, colors['black']), (460, 220))

            # Draw FPS counter
            pg.draw.rect(self.screen, colors['black'], [400, 0, 480, 30])
            self.screen.blit(
                font.render(str(self.clock.get_fps()), 1, colors['white']),
                (400, 0))

            # Draw kill counter
            pg.draw.rect(self.screen, colors['black'], [0, 0, 80, 30])
            self.screen.blit(
                font.render(str(self.watcher.get(1) * 1. / bullet_counter), 1,
                            colors['white']), (0, 0))

            # Move asteroid
            self.get_entity(self.watcher.get(asteroid)).set(
                'pos',
                _radial(
                    self.get_entity(self.watcher.get(asteroid)).get('pos'),
                    2 * 3.141 / 30 * self.watcher.get(2)))

            self.sim()
            pg.display.flip()

        with open('sim/pickle_data/data_zybel.pck', 'wb+') as f:
            print('Dumping')
            dump(data, f)