class IntelligentAgent2(store.object): # initialize all the member variables def __init__(self): # AI contains the Game Logic self.ai_logic = GameLogic() # discount factor in the Q-learning algorithm self.discount = 0.4 # k value for exploration self.k = 2 # Set up for Parent self.p_change = 0 self.t_count = 0 self.u_count = 0 self.v_count = 0 self.w_count = 0 # setup and initialize ordinal matrix self.p_ord_matrix = [] self.ord_t = 0 self.ord_u = 0 self.ord_v = 0 self.ord_w = 0 self.update_p_ord_matrix() self.save_p_ord_matrix() # setup and initialize Q-value matrix self.q_t = self.ai_logic.t[1] self.q_u = self.ai_logic.u[1] self.q_v = self.ai_logic.v[1] self.q_w = self.ai_logic.w[1] self.p_q_matrix = [] self.update_p_q_matrix() # setup and initialize probability matrix self.p_probability_matrix = [] self.update_p_probability_matrix() # Set up for Child self.c_change = 0 self.o_count = 0 self.p_count = 0 self.q_count = 0 self.y_count = 0 # setup and initialize ordinal matrix self.c_ord_matrix = [] self.ord_o = 0 self.ord_p = 0 self.ord_q = 0 self.ord_y = 0 self.update_c_ord_matrix() self.save_c_ord_matrix() # setup and initialize Q-value matrix self.q_o = self.ai_logic.t[0] self.q_p = self.ai_logic.u[0] self.q_q = self.ai_logic.v[0] self.q_y = self.ai_logic.w[0] self.c_q_matrix = [] self.update_c_q_matrix() # setup and initialize probability matrix self.c_probability_matrix = [] self.update_c_probability_matrix() def reset_p_learning_rate(self): self.t_count = 0 self.u_count = 0 self.v_count = 0 self.w_count = 0 def reset_c_learning_rate(self): self.o_count = 0 self.p_count = 0 self.q_count = 0 self.y_count = 0 # function to calculate new ordinal matrix with new state and store the ordinal matrix def update_ord_matrix(self): self.update_p_ord_matrix() self.update_c_ord_matrix() def update_p_ord_matrix(self): self.ai_logic.update_matrix() self.p_ord_matrix = [] temp_matrix = [ self.ai_logic.t[1], self.ai_logic.u[1], self.ai_logic.v[1], self.ai_logic.w[1] ] # for each action, rank the reward in terms of value. Giving a matrix representing 41 states for i in range(4): count = 1 for j in range(4): if i == j: continue if temp_matrix[i] > temp_matrix[j]: count += 1 self.p_ord_matrix.append(count) def update_c_ord_matrix(self): self.ai_logic.update_matrix() self.c_ord_matrix = [] temp_matrix = [ self.ai_logic.t[0], self.ai_logic.u[0], self.ai_logic.v[0], self.ai_logic.w[0] ] for i in range(4): count = 1 for j in range(4): if i == j: continue if temp_matrix[i] > temp_matrix[j]: count += 1 self.c_ord_matrix.append(count) # store ordinal values in the class to remember def save_p_ord_matrix(self): self.ord_t = self.p_ord_matrix[0] self.ord_u = self.p_ord_matrix[1] self.ord_v = self.p_ord_matrix[2] self.ord_w = self.p_ord_matrix[3] def save_c_ord_matrix(self): self.ord_o = self.c_ord_matrix[0] self.ord_p = self.c_ord_matrix[1] self.ord_q = self.c_ord_matrix[2] self.ord_y = self.c_ord_matrix[3] # checks if the new calculated ordinal matrix is different to the old one, representing a change in state def ord_matrix_change(self): parent = self.p_ord_matrix_change() child = self.c_ord_matrix_change() if child == True or parent == True: return True else: return False def p_ord_matrix_change(self): if (self.ord_t != self.p_ord_matrix[0] or self.ord_u != self.p_ord_matrix[1] or self.ord_v != self.p_ord_matrix[2] or self.ord_w != self.p_ord_matrix[3]): self.save_p_ord_matrix() self.p_change += 1 self.reset_p_learning_rate() return True return False def c_ord_matrix_change(self): if (self.ord_o != self.c_ord_matrix[0] or self.ord_p != self.c_ord_matrix[1] or self.ord_q != self.c_ord_matrix[2] or self.ord_y != self.c_ord_matrix[3]): self.save_c_ord_matrix() self.c_change += 1 self.reset_c_learning_rate() return True return False # updates the q matrix with new values of Q for each state change def update_q_matrix(self): self.update_p_q_matrix() self.update_c_q_matrix() def update_p_q_matrix(self): self.p_q_matrix = [] self.p_q_matrix.append(self.q_t) self.p_q_matrix.append(self.q_u) self.p_q_matrix.append(self.q_v) self.p_q_matrix.append(self.q_w) def update_c_q_matrix(self): self.c_q_matrix = [] self.c_q_matrix.append(self.q_o) self.c_q_matrix.append(self.q_p) self.c_q_matrix.append(self.q_q) self.c_q_matrix.append(self.q_y) # updates the Q value for action t def update_q_t(self): if self.t_count == 0: learning_rate = 1 else: learning_rate = 1.0 / self.t_count self.q_t = self.q_t + learning_rate * ( self.ai_logic.t[1] + (self.discount * max(self.p_q_matrix)) - self.q_t) # updates the Q value for action u def update_q_u(self): if self.u_count == 0: learning_rate = 1 else: learning_rate = 1.0 / self.u_count self.q_u = self.q_u + learning_rate * ( self.ai_logic.u[1] + (self.discount * max(self.p_q_matrix)) - self.q_u) # updates the Q value for action v def update_q_v(self): if self.v_count == 0: learning_rate = 1 else: learning_rate = 1.0 / self.v_count self.q_v = self.q_v + learning_rate * ( self.ai_logic.v[1] + (self.discount * max(self.p_q_matrix)) - self.q_v) # updates the Q value for action w def update_q_w(self): if self.w_count == 0: learning_rate = 1 else: learning_rate = 1.0 / self.w_count self.q_w = self.q_w + learning_rate * ( self.ai_logic.w[1] + (self.discount * max(self.p_q_matrix)) - self.q_w) # updates the Q value for action o def update_q_o(self): if self.o_count == 0: learning_rate = 1 else: learning_rate = 1.0 / self.o_count self.q_o = self.q_o + learning_rate * ( self.ai_logic.t[0] + (self.discount * max(self.c_q_matrix)) - self.q_o) # updates the Q value for action p def update_q_p(self): if self.p_count == 0: learning_rate = 1 else: learning_rate = 1.0 / self.p_count self.q_p = self.q_p + learning_rate * ( self.ai_logic.u[0] + (self.discount * max(self.c_q_matrix)) - self.q_p) # updates the Q value for action q def update_q_q(self): if self.q_count == 0: learning_rate = 1 else: learning_rate = 1.0 / self.q_count self.q_q = self.q_q + learning_rate * ( self.ai_logic.v[0] + (self.discount * max(self.c_q_matrix)) - self.q_q) # updates the Q value for action y def update_q_y(self): if self.y_count == 0: learning_rate = 1 else: learning_rate = 1.0 / self.y_count self.q_y = self.q_y + learning_rate * ( self.ai_logic.w[0] + (self.discount * max(self.c_q_matrix)) - self.q_y) # updates the probability matrix after a change in Q values def update_probability_matrix(self): self.update_p_probability_matrix() self.update_c_probability_matrix() def update_p_probability_matrix(self): self.p_probability_matrix = [] self.update_p_q_matrix() total = 0 for i in range(4): total += self.k**self.p_q_matrix[i] self.p_probability_matrix.append((self.k**self.q_t) / total) self.p_probability_matrix.append((self.k**self.q_u) / total) self.p_probability_matrix.append((self.k**self.q_v) / total) self.p_probability_matrix.append((self.k**self.q_w) / total) def update_c_probability_matrix(self): self.c_probability_matrix = [] self.update_c_q_matrix() total = 0 for i in range(4): total += self.k**self.c_q_matrix[i] self.c_probability_matrix.append((self.k**self.q_o) / total) self.c_probability_matrix.append((self.k**self.q_p) / total) self.c_probability_matrix.append((self.k**self.q_q) / total) self.c_probability_matrix.append((self.k**self.q_y) / total) # checks if the AI won the game given an action pair def check_win(self, adult, child): return self.ai_logic.check_win(adult, child) # returns the number of rounds the AI has played def get_round(self): return self.ai_logic.round def parent_move(self): # random number from 0 to 1 choice = random.random() # determine the probablistic range of each action prob_t = self.p_probability_matrix[0] prob_u = self.p_probability_matrix[0] + self.p_probability_matrix[1] prob_v = self.p_probability_matrix[0] + self.p_probability_matrix[ 1] + self.p_probability_matrix[2] prob_w = 1 # if t is chosen, adult attends if choice < prob_t: adult = "attend" # if u is chosen, adult ignores if prob_t <= choice and choice < prob_u: adult = "ignore" # if v is chosen, adult attends if prob_u <= choice and choice < prob_v: adult = "attend" # if w is chosen, adult ignores if prob_v <= choice and choice < prob_w: adult = "ignore" return adult def child_move(self): # random number from 0 to 1 choice = random.random() # determine the probablistic range of each action prob_o = self.c_probability_matrix[0] prob_p = self.c_probability_matrix[0] + self.c_probability_matrix[1] prob_q = self.c_probability_matrix[0] + self.c_probability_matrix[ 1] + self.c_probability_matrix[2] prob_y = 1 # if t is chosen, adult attends if choice < prob_o: child = "go" # if u is chosen, adult ignores if prob_o <= choice and choice < prob_p: child = "go" # if v is chosen, adult attends if prob_p <= choice and choice < prob_q: child = "dontgo" # if w is chosen, adult ignores if prob_q <= choice and choice < prob_y: child = "dontgo" return child def move(self): adult = self.parent_move() child = self.child_move() # find which action was chosen and update the specific Q value and probability matrix if adult == "attend" and child == "go": self.ai_logic.attend_go() self.ai_logic.update_matrix() self.update_ord_matrix() # checks if the action caused a change in the ordinal matrix self.ord_matrix_change() if not self.check_final_action("adult"): self.t_count += 1 self.update_q_t() if not self.check_final_action("child"): self.o_count += 1 self.update_q_o() self.update_q_matrix() self.update_probability_matrix() if adult == "attend" and child == "dontgo": self.ai_logic.attend_dontgo() self.ai_logic.update_matrix() self.update_ord_matrix() self.ord_matrix_change() if not self.check_final_action("adult"): self.v_count += 1 self.update_q_v() if not self.check_final_action("child"): self.q_count += 1 self.update_q_q() self.update_q_matrix() self.update_probability_matrix() if adult == "ignore" and child == "go": self.ai_logic.ignore_go() self.ai_logic.update_matrix() self.update_ord_matrix() self.ord_matrix_change() if not self.check_final_action("adult"): self.u_count += 1 self.update_q_u() if not self.check_final_action("child"): self.update_q_p() self.p_count += 1 self.update_q_matrix() self.update_probability_matrix() if adult == "ignore" and child == "dontgo": self.ai_logic.ignore_dontgo() self.ai_logic.update_matrix() self.update_ord_matrix() self.ord_matrix_change() if not self.check_final_action("adult"): self.w_count += 1 self.update_q_w() if not self.check_final_action("child"): self.y_count += 1 self.update_q_y() self.update_q_matrix() self.update_probability_matrix() return [adult, child] def check_final_action(self, player): if player == "adult": for i in range(4): if round(self.p_probability_matrix[i], 2) == 1.00: return True return False elif player == "child": for i in range(4): if round(self.c_probability_matrix[i], 2) == 1.00: return True return False
class IntelligentAgent(store.object): # initialize all the member variables def __init__(self): # AI contains the Game Logic self.ai_logic = GameLogic() # discount factor in the Q-learning algorithm self.discount = 0.4 # k value for exploration self.k = 2 self.adult_last_move = "first" self.change = 0 self.t_count = 0 self.u_count = 0 self.v_count = 0 self.w_count = 0 # setup and initialize ordinal matrix self.ord_matrix = [] self.ord_t = 0 self.ord_u = 0 self.ord_v = 0 self.ord_w = 0 self.update_ord_matrix() self.save_ord_matrix() # setup and initialize Q-value matrix self.q_t = self.ai_logic.t[1] self.q_u = self.ai_logic.u[1] self.q_v = self.ai_logic.v[1] self.q_w = self.ai_logic.w[1] self.q_matrix = [] self.update_q_matrix() # setup and initialize probability matrix self.probability_matrix = [] self.update_probability_matrix() def reset_learning_rate(self): self.t_count = 0 self.u_count = 0 self.v_count = 0 self.w_count = 0 # function to calculate new ordinal matrix with new state and store the ordinal matrix def update_ord_matrix(self): self.ai_logic.update_matrix() self.ord_matrix = [] temp_matrix = [self.ai_logic.t[1], self.ai_logic.u[1], self.ai_logic.v[1], self.ai_logic.w[1]] # for each action, rank the reward in terms of value. Giving a matrix representing 41 states for i in range(4): count = 1 for j in range(4): if i == j: continue if temp_matrix[i] > temp_matrix[j]: count += 1 self.ord_matrix.append(count) # store ordinal values in the class to remember def save_ord_matrix(self): self.ord_t = self.ord_matrix[0] self.ord_u = self.ord_matrix[1] self.ord_v = self.ord_matrix[2] self.ord_w = self.ord_matrix[3] # checks if the new calculated ordinal matrix is different to the old one, representing a change in state def ord_matrix_change(self): if (self.ord_t != self.ord_matrix[0] or self.ord_u != self.ord_matrix[1] or self.ord_v != self.ord_matrix[2] or self.ord_w != self.ord_matrix[3]): self.save_ord_matrix() return True return False # updates the q matrix with new values of Q for each state change def update_q_matrix(self): self.q_matrix = [] self.q_matrix.append(self.q_t) self.q_matrix.append(self.q_u) self.q_matrix.append(self.q_v) self.q_matrix.append(self.q_w) # updates the Q value for action t. def update_q_t(self): if self.t_count == 0: learning_rate = 1 else: learning_rate = 1.0/self.t_count self.q_t = self.q_t + learning_rate*(self.ai_logic.t[1] + (self.discount * max(self.q_matrix)) - self.q_t) # updates the Q value for action u def update_q_u(self): if self.u_count == 0: learning_rate = 1 else: learning_rate = 1.0/self.u_count self.q_u = self.q_u + learning_rate*(self.ai_logic.u[1] + (self.discount * max(self.q_matrix)) - self.q_u) # updates the Q value for action v def update_q_v(self): if self.v_count == 0: learning_rate = 1 else: learning_rate = 1.0/self.v_count self.q_v = self.q_v + learning_rate*(self.ai_logic.v[1] + (self.discount * max(self.q_matrix)) - self.q_v) # updates the Q value for action w def update_q_w(self): if self.w_count == 0: learning_rate = 1 else: learning_rate = 1.0/self.w_count self.q_w = self.q_w + learning_rate*(self.ai_logic.w[1] + (self.discount * max(self.q_matrix)) - self.q_w) # updates the probability matrix after a change in Q values def update_probability_matrix(self): self.probability_matrix = [] self.update_q_matrix() total = 0 for i in range(4): total += self.k ** self.q_matrix[i] self.probability_matrix.append((self.k**self.q_t)/total) self.probability_matrix.append((self.k**self.q_u)/total) self.probability_matrix.append((self.k**self.q_v)/total) self.probability_matrix.append((self.k**self.q_w)/total) # function called to inform the AI to make a move and returns the action in the form of [adult, child] def move(self): # random number from 0 to 1 choice = random.random() # determine the probablistic range of each action prob_t = self.probability_matrix[0] prob_u = self.probability_matrix[0]+self.probability_matrix[1] prob_v = self.probability_matrix[0]+self.probability_matrix[1]+self.probability_matrix[2] prob_w = 1 # if t is chosen, adult attends if choice < prob_t: adult = "attend" # if u is chosen, adult ignores if prob_t <= choice and choice < prob_u: adult = "ignore" # if v is chosen, adult attends if prob_u <= choice and choice < prob_v: adult = "attend" # if w is chosen, adult ignores if prob_v <= choice and choice < prob_w: adult = "ignore" # find the action choice of the inner child given an adult's action choice child = self.child_move(adult) # find which action was chosen and update the specific Q value and probability matrix if adult == "attend" and child == "go": self.ai_logic.attend_go() self.ai_logic.update_matrix() self.update_ord_matrix() # checks if the action caused a change in the ordinal matrix if self.ord_matrix_change(): self.change += 1 self.reset_learning_rate() self.t_count += 1 self.update_q_t() self.update_q_matrix() self.update_probability_matrix() if adult == "attend" and child == "dontgo": self.ai_logic.attend_dontgo() self.ai_logic.update_matrix() self.update_ord_matrix() if self.ord_matrix_change(): self.change += 1 self.reset_learning_rate() self.v_count += 1 self.update_q_v() self.update_q_matrix() self.update_probability_matrix() if adult == "ignore" and child == "go": self.ai_logic.ignore_go() self.ai_logic.update_matrix() self.update_ord_matrix() if self.ord_matrix_change(): self.change += 1 self.reset_learning_rate() self.u_count += 1 self.update_q_u() self.update_q_matrix() self.update_probability_matrix() if adult == "ignore" and child == "dontgo": self.ai_logic.ignore_dontgo() self.ai_logic.update_matrix() self.update_ord_matrix() if self.ord_matrix_change(): self.change += 1 self.reset_learning_rate() self.w_count += 1 self.update_q_w() self.update_q_matrix() self.update_probability_matrix() return [adult, child] # this function prompts the child to choose an action given the adult's choice. # It picks the action with the greatest reward, if equal, then a random choice between go and don't go def child_move(self, adult): if self.adult_last_move == "first": self.adult_last_move = adult child_action = random.random() if child_action <= 0.5: return "go" else: return "dontgo" if self.adult_last_move == "attend": self.adult_last_move = adult if self.ai_logic.t[0] > self.ai_logic.v[0]: return "go" elif self.ai_logic.t[0] < self.ai_logic.v[0]: return "dontgo" elif self.ai_logic.t[0] == self.ai_logic.v[0]: child_action = random.random() if child_action < 0.5: return "go" else: return "dontgo" if self.adult_last_move == "ignore": self.adult_last_move = adult if self.ai_logic.u[0] > self.ai_logic.w[0]: return "go" elif self.ai_logic.u[0] < self.ai_logic.w[0]: return "dontgo" elif self.ai_logic.u[0] == self.ai_logic.w[0]: child_action = random.random() if child_action < 0.5: return "go" else: return "dontgo" # checks if the AI won the game given an action pair def check_win(self, adult, child): return self.ai_logic.check_win(adult, child) # returns the number of rounds the AI has played def get_round(self): return self.ai_logic.round
class IntelligentAgent(store.object): # initialize all the member variables def __init__(self): # AI contains the Game Logic self.ai_logic = GameLogic() # discount factor in the Q-learning algorithm self.discount = 0.4 # k value for exploration self.k = 2 self.adult_last_move = "first" self.change = 0 self.t_count = 0 self.u_count = 0 self.v_count = 0 self.w_count = 0 # setup and initialize ordinal matrix self.ord_matrix = [] self.ord_t = 0 self.ord_u = 0 self.ord_v = 0 self.ord_w = 0 self.update_ord_matrix() self.save_ord_matrix() # setup and initialize Q-value matrix self.q_t = self.ai_logic.t[1] self.q_u = self.ai_logic.u[1] self.q_v = self.ai_logic.v[1] self.q_w = self.ai_logic.w[1] self.q_matrix = [] self.update_q_matrix() # setup and initialize probability matrix self.probability_matrix = [] self.update_probability_matrix() def reset_learning_rate(self): self.t_count = 0 self.u_count = 0 self.v_count = 0 self.w_count = 0 # function to calculate new ordinal matrix with new state and store the ordinal matrix def update_ord_matrix(self): self.ai_logic.update_matrix() self.ord_matrix = [] temp_matrix = [ self.ai_logic.t[1], self.ai_logic.u[1], self.ai_logic.v[1], self.ai_logic.w[1] ] # for each action, rank the reward in terms of value. Giving a matrix representing 41 states for i in range(4): count = 1 for j in range(4): if i == j: continue if temp_matrix[i] > temp_matrix[j]: count += 1 self.ord_matrix.append(count) # store ordinal values in the class to remember def save_ord_matrix(self): self.ord_t = self.ord_matrix[0] self.ord_u = self.ord_matrix[1] self.ord_v = self.ord_matrix[2] self.ord_w = self.ord_matrix[3] # checks if the new calculated ordinal matrix is different to the old one, representing a change in state def ord_matrix_change(self): if (self.ord_t != self.ord_matrix[0] or self.ord_u != self.ord_matrix[1] or self.ord_v != self.ord_matrix[2] or self.ord_w != self.ord_matrix[3]): self.save_ord_matrix() return True return False # updates the q matrix with new values of Q for each state change def update_q_matrix(self): self.q_matrix = [] self.q_matrix.append(self.q_t) self.q_matrix.append(self.q_u) self.q_matrix.append(self.q_v) self.q_matrix.append(self.q_w) # updates the Q value for action t. def update_q_t(self): if self.t_count == 0: learning_rate = 1 else: learning_rate = 1.0 / self.t_count self.q_t = self.q_t + learning_rate * ( self.ai_logic.t[1] + (self.discount * max(self.q_matrix)) - self.q_t) # updates the Q value for action u def update_q_u(self): if self.u_count == 0: learning_rate = 1 else: learning_rate = 1.0 / self.u_count self.q_u = self.q_u + learning_rate * ( self.ai_logic.u[1] + (self.discount * max(self.q_matrix)) - self.q_u) # updates the Q value for action v def update_q_v(self): if self.v_count == 0: learning_rate = 1 else: learning_rate = 1.0 / self.v_count self.q_v = self.q_v + learning_rate * ( self.ai_logic.v[1] + (self.discount * max(self.q_matrix)) - self.q_v) # updates the Q value for action w def update_q_w(self): if self.w_count == 0: learning_rate = 1 else: learning_rate = 1.0 / self.w_count self.q_w = self.q_w + learning_rate * ( self.ai_logic.w[1] + (self.discount * max(self.q_matrix)) - self.q_w) # updates the probability matrix after a change in Q values def update_probability_matrix(self): self.probability_matrix = [] self.update_q_matrix() total = 0 for i in range(4): total += self.k**self.q_matrix[i] self.probability_matrix.append((self.k**self.q_t) / total) self.probability_matrix.append((self.k**self.q_u) / total) self.probability_matrix.append((self.k**self.q_v) / total) self.probability_matrix.append((self.k**self.q_w) / total) # function called to inform the AI to make a move and returns the action in the form of [adult, child] def move(self): # random number from 0 to 1 choice = random.random() # determine the probablistic range of each action prob_t = self.probability_matrix[0] prob_u = self.probability_matrix[0] + self.probability_matrix[1] prob_v = self.probability_matrix[0] + self.probability_matrix[ 1] + self.probability_matrix[2] prob_w = 1 # if t is chosen, adult attends if choice < prob_t: adult = "attend" # if u is chosen, adult ignores if prob_t <= choice and choice < prob_u: adult = "ignore" # if v is chosen, adult attends if prob_u <= choice and choice < prob_v: adult = "attend" # if w is chosen, adult ignores if prob_v <= choice and choice < prob_w: adult = "ignore" # find the action choice of the inner child given an adult's action choice child = self.child_move(adult) # find which action was chosen and update the specific Q value and probability matrix if adult == "attend" and child == "go": self.ai_logic.attend_go() self.ai_logic.update_matrix() self.update_ord_matrix() # checks if the action caused a change in the ordinal matrix if self.ord_matrix_change(): self.change += 1 self.reset_learning_rate() self.t_count += 1 self.update_q_t() self.update_q_matrix() self.update_probability_matrix() if adult == "attend" and child == "dontgo": self.ai_logic.attend_dontgo() self.ai_logic.update_matrix() self.update_ord_matrix() if self.ord_matrix_change(): self.change += 1 self.reset_learning_rate() self.v_count += 1 self.update_q_v() self.update_q_matrix() self.update_probability_matrix() if adult == "ignore" and child == "go": self.ai_logic.ignore_go() self.ai_logic.update_matrix() self.update_ord_matrix() if self.ord_matrix_change(): self.change += 1 self.reset_learning_rate() self.u_count += 1 self.update_q_u() self.update_q_matrix() self.update_probability_matrix() if adult == "ignore" and child == "dontgo": self.ai_logic.ignore_dontgo() self.ai_logic.update_matrix() self.update_ord_matrix() if self.ord_matrix_change(): self.change += 1 self.reset_learning_rate() self.w_count += 1 self.update_q_w() self.update_q_matrix() self.update_probability_matrix() return [adult, child] # this function prompts the child to choose an action given the adult's choice. # It picks the action with the greatest reward, if equal, then a random choice between go and don't go def child_move(self, adult): if self.adult_last_move == "first": self.adult_last_move = adult child_action = random.random() if child_action <= 0.5: return "go" else: return "dontgo" if self.adult_last_move == "attend": self.adult_last_move = adult if self.ai_logic.t[0] > self.ai_logic.v[0]: return "go" elif self.ai_logic.t[0] < self.ai_logic.v[0]: return "dontgo" elif self.ai_logic.t[0] == self.ai_logic.v[0]: child_action = random.random() if child_action < 0.5: return "go" else: return "dontgo" if self.adult_last_move == "ignore": self.adult_last_move = adult if self.ai_logic.u[0] > self.ai_logic.w[0]: return "go" elif self.ai_logic.u[0] < self.ai_logic.w[0]: return "dontgo" elif self.ai_logic.u[0] == self.ai_logic.w[0]: child_action = random.random() if child_action < 0.5: return "go" else: return "dontgo" # checks if the AI won the game given an action pair def check_win(self, adult, child): return self.ai_logic.check_win(adult, child) # returns the number of rounds the AI has played def get_round(self): return self.ai_logic.round
class IntelligentAgent2(store.object): # initialize all the member variables def __init__(self): # AI contains the Game Logic self.ai_logic = GameLogic() # discount factor in the Q-learning algorithm self.discount = 0.4 # k value for exploration self.k = 2 # Set up for Parent self.p_change = 0 self.t_count = 0 self.u_count = 0 self.v_count = 0 self.w_count = 0 # setup and initialize ordinal matrix self.p_ord_matrix = [] self.ord_t = 0 self.ord_u = 0 self.ord_v = 0 self.ord_w = 0 self.update_p_ord_matrix() self.save_p_ord_matrix() # setup and initialize Q-value matrix self.q_t = self.ai_logic.t[1] self.q_u = self.ai_logic.u[1] self.q_v = self.ai_logic.v[1] self.q_w = self.ai_logic.w[1] self.p_q_matrix = [] self.update_p_q_matrix() # setup and initialize probability matrix self.p_probability_matrix = [] self.update_p_probability_matrix() # Set up for Child self.c_change = 0 self.o_count = 0 self.p_count = 0 self.q_count = 0 self.y_count = 0 # setup and initialize ordinal matrix self.c_ord_matrix = [] self.ord_o = 0 self.ord_p = 0 self.ord_q = 0 self.ord_y = 0 self.update_c_ord_matrix() self.save_c_ord_matrix() # setup and initialize Q-value matrix self.q_o = self.ai_logic.t[0] self.q_p = self.ai_logic.u[0] self.q_q = self.ai_logic.v[0] self.q_y = self.ai_logic.w[0] self.c_q_matrix = [] self.update_c_q_matrix() # setup and initialize probability matrix self.c_probability_matrix = [] self.update_c_probability_matrix() def reset_p_learning_rate(self): self.t_count = 0 self.u_count = 0 self.v_count = 0 self.w_count = 0 def reset_c_learning_rate(self): self.o_count = 0 self.p_count = 0 self.q_count = 0 self.y_count = 0 # function to calculate new ordinal matrix with new state and store the ordinal matrix def update_ord_matrix(self): self.update_p_ord_matrix() self.update_c_ord_matrix() def update_p_ord_matrix(self): self.ai_logic.update_matrix() self.p_ord_matrix = [] temp_matrix = [self.ai_logic.t[1], self.ai_logic.u[1], self.ai_logic.v[1], self.ai_logic.w[1]] # for each action, rank the reward in terms of value. Giving a matrix representing 41 states for i in range(4): count = 1 for j in range(4): if i == j: continue if temp_matrix[i] > temp_matrix[j]: count += 1 self.p_ord_matrix.append(count) def update_c_ord_matrix(self): self.ai_logic.update_matrix() self.c_ord_matrix = [] temp_matrix = [self.ai_logic.t[0], self.ai_logic.u[0], self.ai_logic.v[0], self.ai_logic.w[0]] for i in range(4): count = 1 for j in range(4): if i == j: continue if temp_matrix[i] > temp_matrix[j]: count += 1 self.c_ord_matrix.append(count) # store ordinal values in the class to remember def save_p_ord_matrix(self): self.ord_t = self.p_ord_matrix[0] self.ord_u = self.p_ord_matrix[1] self.ord_v = self.p_ord_matrix[2] self.ord_w = self.p_ord_matrix[3] def save_c_ord_matrix(self): self.ord_o = self.c_ord_matrix[0] self.ord_p = self.c_ord_matrix[1] self.ord_q = self.c_ord_matrix[2] self.ord_y = self.c_ord_matrix[3] # checks if the new calculated ordinal matrix is different to the old one, representing a change in state def ord_matrix_change(self): parent = self.p_ord_matrix_change() child = self.c_ord_matrix_change() if child == True or parent == True: return True else: return False def p_ord_matrix_change(self): if (self.ord_t != self.p_ord_matrix[0] or self.ord_u != self.p_ord_matrix[1] or self.ord_v != self.p_ord_matrix[2] or self.ord_w != self.p_ord_matrix[3]): self.save_p_ord_matrix() self.p_change += 1 self.reset_p_learning_rate() return True return False def c_ord_matrix_change(self): if (self.ord_o != self.c_ord_matrix[0] or self.ord_p != self.c_ord_matrix[1] or self.ord_q != self.c_ord_matrix[2] or self.ord_y != self.c_ord_matrix[3]): self.save_c_ord_matrix() self.c_change += 1 self.reset_c_learning_rate() return True return False # updates the q matrix with new values of Q for each state change def update_q_matrix(self): self.update_p_q_matrix() self.update_c_q_matrix() def update_p_q_matrix(self): self.p_q_matrix = [] self.p_q_matrix.append(self.q_t) self.p_q_matrix.append(self.q_u) self.p_q_matrix.append(self.q_v) self.p_q_matrix.append(self.q_w) def update_c_q_matrix(self): self.c_q_matrix = [] self.c_q_matrix.append(self.q_o) self.c_q_matrix.append(self.q_p) self.c_q_matrix.append(self.q_q) self.c_q_matrix.append(self.q_y) # updates the Q value for action t def update_q_t(self): if self.t_count == 0: learning_rate = 1 else: learning_rate = 1.0/self.t_count self.q_t = self.q_t + learning_rate*(self.ai_logic.t[1] + (self.discount * max(self.p_q_matrix)) - self.q_t) # updates the Q value for action u def update_q_u(self): if self.u_count == 0: learning_rate = 1 else: learning_rate = 1.0/self.u_count self.q_u = self.q_u + learning_rate*(self.ai_logic.u[1] + (self.discount * max(self.p_q_matrix)) - self.q_u) # updates the Q value for action v def update_q_v(self): if self.v_count == 0: learning_rate = 1 else: learning_rate = 1.0/self.v_count self.q_v = self.q_v + learning_rate*(self.ai_logic.v[1] + (self.discount * max(self.p_q_matrix)) - self.q_v) # updates the Q value for action w def update_q_w(self): if self.w_count == 0: learning_rate = 1 else: learning_rate = 1.0/self.w_count self.q_w = self.q_w + learning_rate*(self.ai_logic.w[1] + (self.discount * max(self.p_q_matrix)) - self.q_w) # updates the Q value for action o def update_q_o(self): if self.o_count == 0: learning_rate = 1 else: learning_rate = 1.0/self.o_count self.q_o = self.q_o + learning_rate*(self.ai_logic.t[0] + (self.discount * max(self.c_q_matrix)) - self.q_o) # updates the Q value for action p def update_q_p(self): if self.p_count == 0: learning_rate = 1 else: learning_rate = 1.0/self.p_count self.q_p = self.q_p + learning_rate*(self.ai_logic.u[0] + (self.discount * max(self.c_q_matrix)) - self.q_p) # updates the Q value for action q def update_q_q(self): if self.q_count == 0: learning_rate = 1 else: learning_rate = 1.0/self.q_count self.q_q = self.q_q + learning_rate*(self.ai_logic.v[0] + (self.discount * max(self.c_q_matrix)) - self.q_q) # updates the Q value for action y def update_q_y(self): if self.y_count == 0: learning_rate = 1 else: learning_rate = 1.0/self.y_count self.q_y = self.q_y + learning_rate*(self.ai_logic.w[0] + (self.discount * max(self.c_q_matrix)) - self.q_y) # updates the probability matrix after a change in Q values def update_probability_matrix(self): self.update_p_probability_matrix() self.update_c_probability_matrix() def update_p_probability_matrix(self): self.p_probability_matrix = [] self.update_p_q_matrix() total = 0 for i in range(4): total += self.k ** self.p_q_matrix[i] self.p_probability_matrix.append((self.k**self.q_t)/total) self.p_probability_matrix.append((self.k**self.q_u)/total) self.p_probability_matrix.append((self.k**self.q_v)/total) self.p_probability_matrix.append((self.k**self.q_w)/total) def update_c_probability_matrix(self): self.c_probability_matrix = [] self.update_c_q_matrix() total = 0 for i in range(4): total += self.k ** self.c_q_matrix[i] self.c_probability_matrix.append((self.k**self.q_o)/total) self.c_probability_matrix.append((self.k**self.q_p)/total) self.c_probability_matrix.append((self.k**self.q_q)/total) self.c_probability_matrix.append((self.k**self.q_y)/total) # checks if the AI won the game given an action pair def check_win(self, adult, child): return self.ai_logic.check_win(adult, child) # returns the number of rounds the AI has played def get_round(self): return self.ai_logic.round def parent_move(self): # random number from 0 to 1 choice = random.random() # determine the probablistic range of each action prob_t = self.p_probability_matrix[0] prob_u = self.p_probability_matrix[0]+self.p_probability_matrix[1] prob_v = self.p_probability_matrix[0]+self.p_probability_matrix[1]+self.p_probability_matrix[2] prob_w = 1 # if t is chosen, adult attends if choice < prob_t: adult = "attend" # if u is chosen, adult ignores if prob_t <= choice and choice < prob_u: adult = "ignore" # if v is chosen, adult attends if prob_u <= choice and choice < prob_v: adult = "attend" # if w is chosen, adult ignores if prob_v <= choice and choice < prob_w: adult = "ignore" return adult def child_move(self): # random number from 0 to 1 choice = random.random() # determine the probablistic range of each action prob_o = self.c_probability_matrix[0] prob_p = self.c_probability_matrix[0]+self.c_probability_matrix[1] prob_q = self.c_probability_matrix[0]+self.c_probability_matrix[1]+self.c_probability_matrix[2] prob_y = 1 # if t is chosen, adult attends if choice < prob_o: child = "go" # if u is chosen, adult ignores if prob_o <= choice and choice < prob_p: child = "go" # if v is chosen, adult attends if prob_p <= choice and choice < prob_q: child = "dontgo" # if w is chosen, adult ignores if prob_q <= choice and choice < prob_y: child = "dontgo" return child def move(self): adult = self.parent_move() child = self.child_move() # find which action was chosen and update the specific Q value and probability matrix if adult == "attend" and child == "go": self.ai_logic.attend_go() self.ai_logic.update_matrix() self.update_ord_matrix() # checks if the action caused a change in the ordinal matrix self.ord_matrix_change() if not self.check_final_action("adult"): self.t_count += 1 self.update_q_t() if not self.check_final_action("child"): self.o_count += 1 self.update_q_o() self.update_q_matrix() self.update_probability_matrix() if adult == "attend" and child == "dontgo": self.ai_logic.attend_dontgo() self.ai_logic.update_matrix() self.update_ord_matrix() self.ord_matrix_change() if not self.check_final_action("adult"): self.v_count += 1 self.update_q_v() if not self.check_final_action("child"): self.q_count += 1 self.update_q_q() self.update_q_matrix() self.update_probability_matrix() if adult == "ignore" and child == "go": self.ai_logic.ignore_go() self.ai_logic.update_matrix() self.update_ord_matrix() self.ord_matrix_change() if not self.check_final_action("adult"): self.u_count += 1 self.update_q_u() if not self.check_final_action("child"): self.update_q_p() self.p_count += 1 self.update_q_matrix() self.update_probability_matrix() if adult == "ignore" and child == "dontgo": self.ai_logic.ignore_dontgo() self.ai_logic.update_matrix() self.update_ord_matrix() self.ord_matrix_change() if not self.check_final_action("adult"): self.w_count += 1 self.update_q_w() if not self.check_final_action("child"): self.y_count += 1 self.update_q_y() self.update_q_matrix() self.update_probability_matrix() return [adult, child] def check_final_action(self, player): if player == "adult": for i in range(4): if round(self.p_probability_matrix[i], 2) == 1.00: return True return False elif player == "child": for i in range(4): if round(self.c_probability_matrix[i], 2) == 1.00: return True return False