Esempio n. 1
0
def copy(wm):
    WmCopy = WorldModel()
    for i in range(8):
        for j in range(8):
            WmCopy.board[i][j].is_empty = wm.board[i][j].is_empty
            WmCopy.board[i][j].is_white = wm.board[i][j].is_white
    WmCopy.white_team_name = wm.white_team_name
    WmCopy.black_team_name = wm.black_team_name
    WmCopy.my_color = wm.my_color
    return WmCopy
Esempio n. 2
0
    def __init__(self, episodeNum, enableGui=False):
        self.episode_num = episodeNum
        self.is_gui_enable = enableGui
        if self.is_gui_enable:
            self.gui = GUI(enableGui)
        self.wm = WorldModel()
        if self.is_gui_enable:
            self.gui.set_world_model(self.wm)

        self.Q = {}
        self.num_of_is_near_wall = 0
        self.num_of_success_repeat = 0
        self.num_of_pointless_tries = 0

        self.load_learned_data()
Esempio n. 3
0
class Engine(object):
    def __init__(self, enableAiPlayer):
        self.gui = GUI(enableAiPlayer=enableAiPlayer)
        self.wm = WorldModel()
        self.gui.set_world_model(self.wm)
        self.running = True

    def run(self):
        while True:
            self.gui.draw()

            action = None
            try:
                action = self.gui.get_action()
            except OnExitException:
                self.running = False
                self.gui.close()

            self.wm.update(action)
Esempio n. 4
0
class Engine(object):
    def __init__(self, enableAiPlayer):
        self.gui = GUI(enableAiPlayer=enableAiPlayer)
        self.wm = WorldModel()
        self.gui.set_world_model(self.wm)
        self.running = True

    def run(self):
        while True:
            self.gui.draw()

            action = None
            try:
                action = self.gui.get_action()
            except OnExitException:
                self.running = False
                self.gui.close()

            self.wm.update(action)
Esempio n. 5
0
    def __init__(self, episodeNum, enableGui=False):
        self.episode_num = episodeNum
        self.is_gui_enable = enableGui
        if self.is_gui_enable:
            self.gui = GUI(enableGui)
        self.wm = WorldModel()
        if self.is_gui_enable:
            self.gui.set_world_model(self.wm)

        self.Q = {}
        self.num_of_is_near_wall = 0
        self.num_of_success_repeat = 0
        self.num_of_pointless_tries = 0

        self.load_learned_data()
Esempio n. 6
0
class LearningEngine:
    def __init__(self, episodeNum, enableGui=False):
        self.episode_num = episodeNum
        self.is_gui_enable = enableGui
        if self.is_gui_enable:
            self.gui = GUI(enableGui)
        self.wm = WorldModel()
        if self.is_gui_enable:
            self.gui.set_world_model(self.wm)

        self.Q = {}
        self.num_of_is_near_wall = 0
        self.num_of_success_repeat = 0
        self.num_of_pointless_tries = 0

        self.load_learned_data()

    def run(self):
        for i in range(self.episode_num):
            self.log_start_episode(i + 1)

            # init with random state
            self.wm.reset_with_random_state()

            while True:
                if self.is_gui_enable:
                    self.gui.draw()

                # get current world state
                currentState = self.wm.get_current_state().get_discrete_state()

                # select a random action
                valid_actions = self.get_valid_actions()
                randomAction = random.choice(valid_actions)

                # compute next world state but not updating it
                nextState = self.wm.compute_next_state(
                    randomAction).get_discrete_state()

                # calculated reward base on current state
                reward = self.calculate_reward()

                # get max of Q for the next state and all possible action
                nextValidActions = self.get_valid_actions(randomAction)
                maxQ = max([
                    self.Q.get((nextState), {}).get(action, config.DEFAULT_Q)
                    for action in nextValidActions
                ])

                # calculate q for current state
                if not currentState in self.Q:
                    self.Q[currentState] = {}

                q = self.Q[currentState].get(randomAction, config.DEFAULT_Q)
                self.Q[currentState][randomAction] = q + config.Q_ALPHA * (
                    reward + config.Q_GAMMA * maxQ - q)

                self.log_saving_new_q_value(i + 1, currentState, randomAction,
                                            reward,
                                            self.Q[currentState][randomAction],
                                            nextState)

                # update the world with next state
                self.wm.update(randomAction)

                if self.is_episode_finished():
                    break

            self.log_ending_episode(i + 1)
            self.save_learned_data()

    def calculate_reward(self):
        state = self.wm.get_current_state()
        positiveAngle = self.get_positive_angle(state.angle)
        minDistanceFromWall = min(state.pos, config.SPACE_WIDTH - state.pos)

        angleReward = (1.5 if positiveAngle < 45.0 else 1) * (180 -
                                                              positiveAngle)
        reward = angleReward + 2 * minDistanceFromWall
        return reward

    def is_episode_finished(self):
        state = self.wm.get_current_state()
        positiveAngle = self.get_positive_angle(state.angle)
        if positiveAngle < 5:
            self.num_of_success_repeat += 1
            if self.num_of_success_repeat >= config.SUCCESS_REPEATS:
                logging.debug("Suscces")
                logging.debug("p: " + str(positiveAngle) + " w:" +
                              str(abs(state.w)) + " vel: " + str(state.vel) +
                              "np: " + str(self.num_of_success_repeat))
                return True
        else:
            self.num_of_success_repeat = 0

        if 170 < positiveAngle < 180:
            self.num_of_pointless_tries += 1
            if self.num_of_pointless_tries >= config.POINTLESS_REPEATS:
                logging.debug("pointless")
                logging.debug("p: " + str(positiveAngle) + " w:" +
                              str(abs(state.w)) + " vel: " + str(state.vel) +
                              "np: " + str(self.num_of_pointless_tries))
                return True
        else:
            self.num_of_pointless_tries = 0

        minDistanceFromWall = min(state.pos, config.SPACE_WIDTH - state.pos)
        if minDistanceFromWall < 0.01:
            self.num_of_is_near_wall += 1
            if self.num_of_is_near_wall >= config.NEAR_WALL_REPEATS:
                logging.debug("fails")
                logging.debug("p: " + str(positiveAngle) + " w:" +
                              str(abs(state.w)) + " vel: " + str(state.vel) +
                              "minDis: " + str(minDistanceFromWall) + "np: " +
                              str(self.num_of_is_near_wall))
                return True
        else:
            self.num_of_is_near_wall = 0

        return False

    def get_valid_actions(self, action=None):
        currentState = self.wm.get_current_state(
        ) if action is None else self.wm.compute_next_state(action)
        if currentState.pos < 0.5:
            return [ActionType.ACT_NONE, ActionType.ACT_RIGHT]
        elif config.SPACE_WIDTH - currentState.pos < 0.5:
            return [ActionType.ACT_NONE, ActionType.ACT_LEFT]

        return [ActionType.ACT_NONE, ActionType.ACT_RIGHT, ActionType.ACT_LEFT]

    def get_positive_angle(self, angle):
        angle = abs(angle) % 360
        positiveAngle = angle if angle <= 180 else 360 - angle

        return positiveAngle

    def show(self):
        for key in self.Q.keys():
            print "(" + str(key[0].angle * config.DEGREE_STEP) + ", " + str(
                key[0].pos) + ") action: " + str(key[1]) + " --- Q: " + str(
                    self.Q[key])

    def log_start_episode(self, episodeNum):
        print "\nStart Learning new episode(" + str(episodeNum) + "/" + str(
            self.episode_num) + ")"

    def log_saving_new_q_value(self, episodeNum, currentState, action, reward,
                               q, newState):
        print "Episode: " + str(episodeNum) + "/" + str(self.episode_num)
        print "current state:", currentState
        print "did action:", action
        print "entered state:", newState
        print "rewarded:", reward
        print "updating Q for [ (" + str(currentState.angle) + "," + str(
            currentState.pos) + "), " + str(action) + " ] =", q
        print "exploration percent:", str(
            len(self.Q) / (120 * 11.0) *
            100), "( " + str(len(self.Q)) + "/" + str(120 * 11) + ")"
        print '\n'

    def log_ending_episode(self, episodeNum):
        print "Ending episode: " + str(episodeNum)
        print "--------------------------------------------"

    def save_learned_data(self):
        with open(config.LEARNED_DATA["path"], 'wb') as f:
            pickle.dump(self.Q, f, pickle.HIGHEST_PROTOCOL)

    def load_learned_data(self):
        if not os.path.exists(config.LEARNED_DATA["path"]):
            if not os.path.exists(config.LEARNED_DATA["dir"]):
                os.mkdir(config.LEARNED_DATA["dir"])

            open(config.LEARNED_DATA["path"], 'a').close()
        else:
            if os.stat(config.LEARNED_DATA["path"]).st_size != 0:
                with open(config.LEARNED_DATA["path"], 'rb') as f:
                    self.Q = pickle.load(f)
Esempio n. 7
0
 def __init__(self, enableAiPlayer):
     self.gui = GUI(enableAiPlayer=enableAiPlayer)
     self.wm = WorldModel()
     self.gui.set_world_model(self.wm)
     self.running = True
Esempio n. 8
0
class LearningEngine:
    def __init__(self, episodeNum, enableGui=False):
        self.episode_num = episodeNum
        self.is_gui_enable = enableGui
        if self.is_gui_enable:
            self.gui = GUI(enableGui)
        self.wm = WorldModel()
        if self.is_gui_enable:
            self.gui.set_world_model(self.wm)

        self.Q = {}
        self.num_of_is_near_wall = 0
        self.num_of_success_repeat = 0
        self.num_of_pointless_tries = 0

        self.load_learned_data()

    def run(self):
        for i in range(self.episode_num):
            self.log_start_episode(i + 1)

            # init with random state
            self.wm.reset_with_random_state()

            while True:
                if self.is_gui_enable:
                    self.gui.draw()

                # get current world state
                currentState = self.wm.get_current_state().get_discrete_state()

                # select a random action
                valid_actions = self.get_valid_actions()
                randomAction = random.choice(valid_actions)

                # compute next world state but not updating it
                nextState = self.wm.compute_next_state(randomAction).get_discrete_state()

                # calculated reward base on current state
                reward = self.calculate_reward()

                # get max of Q for the next state and all possible action
                nextValidActions = self.get_valid_actions(randomAction)
                maxQ = max([self.Q.get((nextState), {}).get(action, config.DEFAULT_Q) for action in nextValidActions])

                # calculate q for current state
                if not currentState in self.Q:
                    self.Q[currentState] = {}

                q = self.Q[currentState].get(randomAction, config.DEFAULT_Q)
                self.Q[currentState][randomAction] = q + config.Q_ALPHA * (reward + config.Q_GAMMA * maxQ - q)

                self.log_saving_new_q_value(i + 1, currentState, randomAction, reward,
                                            self.Q[currentState][randomAction],
                                            nextState)

                # update the world with next state
                self.wm.update(randomAction)

                if self.is_episode_finished():
                    break

            self.log_ending_episode(i + 1)
            self.save_learned_data()

    def calculate_reward(self):
        state = self.wm.get_current_state()
        positiveAngle = self.get_positive_angle(state.angle)
        minDistanceFromWall = min(state.pos, config.SPACE_WIDTH - state.pos)

        angleReward = (1.5 if positiveAngle < 45.0 else 1) * (180 - positiveAngle)
        reward = angleReward + 2 * minDistanceFromWall
        return reward

    def is_episode_finished(self):
        state = self.wm.get_current_state()
        positiveAngle = self.get_positive_angle(state.angle)
        if positiveAngle < 5:
            self.num_of_success_repeat += 1
            if self.num_of_success_repeat >= config.SUCCESS_REPEATS:
                logging.debug("Suscces")
                logging.debug("p: " + str(positiveAngle) + " w:" + str(abs(state.w)) + " vel: " + str(state.vel) + "np: " + str(self.num_of_success_repeat))
                return True
        else:
            self.num_of_success_repeat = 0

        if 170 < positiveAngle < 180:
            self.num_of_pointless_tries += 1
            if self.num_of_pointless_tries >= config.POINTLESS_REPEATS:
                logging.debug("pointless")
                logging.debug("p: " + str(positiveAngle) + " w:" + str(abs(state.w)) + " vel: " + str(state.vel) + "np: " + str(self.num_of_pointless_tries))
                return True
        else:
            self.num_of_pointless_tries = 0

        minDistanceFromWall = min(state.pos, config.SPACE_WIDTH - state.pos)
        if minDistanceFromWall < 0.01:
            self.num_of_is_near_wall += 1
            if self.num_of_is_near_wall >= config.NEAR_WALL_REPEATS:
                logging.debug("fails")
                logging.debug("p: " + str(positiveAngle) + " w:" + str(abs(state.w)) + " vel: " + str(state.vel) + "minDis: " + str(minDistanceFromWall) +"np: " + str(self.num_of_is_near_wall))
                return True
        else:
            self.num_of_is_near_wall = 0

        return False

    def get_valid_actions(self, action=None):
        currentState = self.wm.get_current_state() if action is None else self.wm.compute_next_state(action)
        if currentState.pos < 0.5:
            return [ActionType.ACT_NONE, ActionType.ACT_RIGHT]
        elif config.SPACE_WIDTH - currentState.pos < 0.5:
            return [ActionType.ACT_NONE, ActionType.ACT_LEFT]

        return [ActionType.ACT_NONE, ActionType.ACT_RIGHT, ActionType.ACT_LEFT]

    def get_positive_angle(self, angle):
        angle = abs(angle) % 360
        positiveAngle = angle if angle <= 180 else 360 - angle

        return positiveAngle

    def show(self):
        for key in self.Q.keys():
            print "(" + str(key[0].angle * config.DEGREE_STEP) + ", " + str(key[0].pos) + ") action: " + str(
                    key[1]) + " --- Q: " + str(self.Q[key])

    def log_start_episode(self, episodeNum):
        print "\nStart Learning new episode(" + str(episodeNum) + "/" + str(self.episode_num) + ")"

    def log_saving_new_q_value(self, episodeNum, currentState, action, reward, q, newState):
        print "Episode: " + str(episodeNum) + "/" + str(self.episode_num)
        print "current state:", currentState
        print "did action:", action
        print "entered state:", newState
        print "rewarded:", reward
        print "updating Q for [ (" + str(currentState.angle) + "," + str(currentState.pos) + "), " + str(
                action) + " ] =", q
        print "exploration percent:", str(len(self.Q) / (120 * 11.0) * 100), "( " + str(len(self.Q)) + "/" + str(
                120 * 11) + ")"
        print '\n'

    def log_ending_episode(self, episodeNum):
        print "Ending episode: " + str(episodeNum)
        print "--------------------------------------------"

    def save_learned_data(self):
        with open(config.LEARNED_DATA["path"], 'wb') as f:
            pickle.dump(self.Q, f, pickle.HIGHEST_PROTOCOL)

    def load_learned_data(self):
        if not os.path.exists(config.LEARNED_DATA["path"]):
            if not os.path.exists(config.LEARNED_DATA["dir"]):
                os.mkdir(config.LEARNED_DATA["dir"])

            open(config.LEARNED_DATA["path"], 'a').close()
        else:
            if os.stat(config.LEARNED_DATA["path"]).st_size != 0:
                with open(config.LEARNED_DATA["path"], 'rb') as f:
                    self.Q = pickle.load(f)
Esempio n. 9
0
 def __init__(self, enableAiPlayer):
     self.gui = GUI(enableAiPlayer=enableAiPlayer)
     self.wm = WorldModel()
     self.gui.set_world_model(self.wm)
     self.running = True
Esempio n. 10
0
 def __init__(self):
     self.wm = WorldModel()
     self.conn = Connection()
Esempio n. 11
0
class Manager:
    def __init__(self):
        self.wm = WorldModel()
        self.conn = Connection()


    def init(self):
        self.conn.start_server(port=config.port)
        while len(self.conn.clients) < 2:
            sleep(1)

        white_team_name = self.conn.recv(0, 32)
        self.conn.send(0, b'1')

        black_team_name = self.conn.recv(1, 32)
        self.conn.send(1, b'0')

        self.conn.send(0, black_team_name)
        self.conn.send(1, white_team_name)

        self.conn.set_all_timeouts(5)

        self.wm.init(white_team_name.decode(), black_team_name.decode())


    def run(self):
        sleep(3)

        turn = 1
        while True:
            is_white = bool(turn % 2)
            moved = False
            final_move = None

            try:
                data_bytes = self.conn.recv(0 if is_white else 1, 3)
                if data_bytes:
                    client_turn, move = Parser.decode(data_bytes)
                    if client_turn == turn:
                        if self.wm.check_move(move, is_white):
                            moved = True
                            final_move = move
            except Exception as err:
                print (err)

            if not moved:
                print ('random move')
                moves = self.wm.all_moves(is_white)
                if len(moves):
                    final_move = choice(moves)

            self.wm.do_move(final_move, is_white)
            self.conn.send2all(Parser.encode(turn, final_move))

            print (self.wm)

            w, b = self.wm.result()
            if w + b == 64:
                if w > b:
                    print ('White wins!')
                elif w < b:
                    print ('Black wins!')
                else:
                    print ('Draw!')
                break

            turn += 1
            sleep(1)


        sleep(6)
        self.conn.disconnect()