def run(self): """Client's body.""" try: self.connect() self.send_string(self.player_name) state = State(self.read_string()) self.state_hash_tables_tmp[0][state.get_hash()] = { "value": 0, 'used': 1 } while True: # Playing if self.color == state.turn: # check turn action, value = choose_action( state, self.game, self.state_hash_tables_tmp ) # Retrieving best action and its value and pass weights self.send_string(action_to_server_format(action)) print("Choosen action:", action_to_server_format(action)) print("Choosen action value:", value) else: clear_hash_table(self.state_hash_tables_tmp, state) state = State(self.read_string()) update_used(self.state_hash_tables_tmp, state, self.game.weights, self.game.color) except Exception as e: print(e) finally: print("Game ended.")
def search_thread(state, action, game, best_score, beta, depth, max_depth, time_start, state_hash_table, stop, active, id_m, best_scores): lock_time.acquire() tmp_time = stop[0] lock_time.release() while not tmp_time: lock_bool.acquire() active[id_m] = False tmp = active[id_m] lock_bool.release() while not tmp and not tmp_time: lock_bool.acquire() tmp = active[id_m] lock_bool.release() lock_time.acquire() tmp_time = stop[0] lock_time.release() if tmp_time: break lock_value.acquire() tmp_best = best_score[0] a = action[id_m] lock_value.release() v = min_value( State(second_init_args=(state, a[0], a[1], a[2], a[3], a[4])), game, tmp_best, beta, depth + 1, max_depth, time_start, state_hash_table) if v > best_scores[id_m]: best_scores[id_m] = v lock_time.acquire() tmp_time = stop[0] lock_time.release() return
def min_value(state, game, alpha, beta, depth, max_depth, time_start, state_hash_table): tmp_victory = state.check_victory() if tmp_victory == -1 and game.color == "BLACK": # king captured and black player -> Win return MAX_VAL_HEURISTIC elif tmp_victory == -1 and game.color == "WHITE": # King captured and white player -> Lose return -MAX_VAL_HEURISTIC elif tmp_victory == 1 and game.color == "BLACK": # King escaped and black player -> Lose return -MAX_VAL_HEURISTIC elif tmp_victory == 1 and game.color == "WHITE": # King escaped and white player -> Win return MAX_VAL_HEURISTIC state_hash = state.get_hash() index_checkers = MAX_NUM_CHECKERS - cont_pieces(state) lock_hash.acquire() hash_result = copy.deepcopy( state_hash_table[index_checkers].get(state_hash)) lock_hash.release() all_actions = None if hash_result is not None: if hash_result['used'] == 1: return -DRAW_POINTS if hash_result.get('all_actions') is not None: all_actions = hash_result.get('all_actions') if cutoff_test(depth, max_depth, game.max_time, time_start): # If reached maximum depth or total time if hash_result is not None and hash_result.get("value") is not None: return hash_result[ "value"] # If state previously evaluated don't recompute heuristic value = state.compute_heuristic( game.weights, game.color) # If state not previously evaluated add_to_hash(state_hash_table, state_hash, value, None, index_checkers) # Add state and value to hash table return value # Body v = np.inf if all_actions is None: all_actions = game.produce_actions(state) if hash_result is not None: add_to_hash(state_hash_table, state_hash, hash_result['value'], all_actions, index_checkers) if len(all_actions) == 0: return MAX_VAL_HEURISTIC for a in all_actions: v = min( v, max_value( State(second_init_args=(state, a[0], a[1], a[2], a[3], a[4])), game, alpha, beta, depth + 1, max_depth, time_start, state_hash_table)) if v <= alpha: return v beta = min(beta, v) return v
def min_value(state, game, alpha, beta, depth, max_depth, htable, time_start): if game.max_time - (time.time() - time_start) <= 0.1: return False hash_key = state.get_hash() update_hash = True hash_available = False if hash_key in htable: hash_available = True hash_state = htable.get(hash_key) if hash_state.get_forward_depth() >= max_depth - depth: return hash_state.get_value() else: update_hash = False if depth == max_depth: return eval_fn(state, game) v = np.inf if hash_available: all_actions = hash_state.get_actions() else: all_actions = game.produce_actions(state) np.random.shuffle(all_actions) for a in all_actions: if game.max_time - (time.time() - time_start) <= 0.1: return False new_state = State(second_init_args=(state, a[0], a[1], a[2], a[3], a[4])) v = min( v, max_value(new_state, game, alpha, beta, depth + 1, max_depth, htable)) if update_hash or not hash_available: key = new_state.get_hash() htable[key] = HashEntry(key, v, all_actions, max_depth, depth) if v <= alpha: return v beta = min(beta, v) return v
def choose_action(state, game, state_hash_table): """ Search for the best action using min max with alpha beta pruning iteratively increasing the maximum depth. It stops only when available time is almost up. """ time_start = time.time() best_score_end = -np.inf beta = np.inf best_action = None best_action_end = None max_depth = 2 num_state_visited = [0] flag = False all_actions = game.produce_actions( state) # Getting all possible actions given state while time.time() - time_start < game.max_time: cont = 0 best_score = -np.inf for a in all_actions: v = min_value( State(second_init_args=(state, a[0], a[1], a[2], a[3], a[4])), game, best_score, beta, 1, max_depth, time_start, state_hash_table, num_state_visited) cont += 1 if v > best_score: best_score = v best_action = a if time.time() - time_start >= game.max_time: break # If search at current maximum depth is finished, update best action if cont == len(all_actions): best_score_end = best_score best_action_end = best_action flag = True print("Depth reached:", max_depth) elif flag: print("Depth reached:", max_depth - 1) else: print("Minimum depth not reached") max_depth += 1 # Iteratively increasing depth print(num_state_visited, " state visited state in ", time.time() - time_start, " seconds.") return best_action_end, best_score_end
def alpha_beta_cutoff_search(state, game, max_depth, htable, time_start): """Search game to determine best action; use alpha-beta pruning. This version cuts off search and uses an evaluation function.""" best_score = -np.inf beta = np.inf best_action = None action_list = game.produce_actions(state) np.random.shuffle(action_list) for a in action_list: new_state = State(second_init_args=(state, a[0], a[1], a[2], a[3], a[4])) v = min_value(new_state, game, best_score, beta, 1, max_depth, htable, time_start) if v > best_score: best_score = v best_action = a return best_action, best_score, max_depth
def choose_action(state, game, state_hash_table): """ Search for the best action using min max with alpha beta pruning iteratively increasing the maximum depth. It stops only when available time is almost up. """ time_start = time.time() all_actions = game.produce_actions( state) # Getting all possible actions given state best_score = [np.inf] best_score_end = np.inf alpha = [-np.inf] best_action = None best_action_end = None max_depth = 2 flag = False lock_m = Lock() return_values = [-np.inf for x in range(len(all_actions))] while time.time() - time_start < game.max_time: thread_list = [] if len(all_actions) > 0: a = all_actions[0] thread_list.append( Thread(target=max_value, args=(State(second_init_args=(state, a[0], a[1], a[2], a[3], a[4])), game, alpha, best_score, 1, max_depth, time_start, state_hash_table, 0, return_values, lock_m))) thread_list[0].start() thread_list[0].join() if return_values[0] < best_score[0]: best_score[0] = return_values[0] best_action = a for i in range(len(all_actions[1:])): a = all_actions[i + 1] thread_list.append( Thread(target=max_value, args=(State(second_init_args=(state, a[0], a[1], a[2], a[3], a[4])), game, alpha, best_score, 1, max_depth, time_start, state_hash_table, i + 1, return_values, lock_m))) thread_list[i + 1].start() flag_t = True flag_time = False while flag_t and not flag_time: flag_t = False for i in range(len(all_actions)): if not thread_list[i].is_alive(): lock_m.acquire() if return_values[i] < best_score[0]: best_score[0] = return_values[i] best_action = all_actions[i] lock_m.release() else: flag_t = True if time.time() - time_start >= game.max_time: flag_time = True for i in range(len(thread_list)): thread_list[i].join() if not flag_time: best_score_end = best_score[0] best_action_end = best_action flag = True print("Depth reached:", max_depth) elif flag: print("Depth reached:", max_depth - 1) else: print("Minimum depth not reached") max_depth += 1 # Iteratively increasing depth return best_action_end, best_score_end
def max_value(state, game, alpha, beta, depth, max_depth, time_start, state_hash_table, id_m, v, lock_p): lock_2.acquire() state_hash = state.get_hash() hash_result = state_hash_table.get(state_hash) lock_2.release() all_actions = None if hash_result is not None: if hash_result['used'] == 1: v[id_m] = 0 return if hash_result.get('all_actions') is not None: all_actions = hash_result.get('all_actions') if cutoff_test(depth, max_depth, game.max_time, time_start): # If reached maximum depth or total time if hash_result is not None: v[id_m] = hash_result[ "value"] # If state previously evaluated don't recompute heuristic return value = state.compute_heuristic( game.weights, game.color) # If state not previously evaluated lock_2.acquire() add_to_hash(state_hash_table, state_hash, value, all_actions) # Add state and value to hash table lock_2.release() v[id_m] = value return tmp_victory = state.check_victory() if tmp_victory == -1 and game.color == "BLACK": # king captured and black player -> Win v[id_m] = -MAX_VAL_HEURISTIC return elif tmp_victory == -1 and game.color == "WHITE": # King captured and white player -> Lose v[id_m] = MAX_VAL_HEURISTIC return elif tmp_victory == 1 and game.color == "BLACK": # King escaped and black player -> Lose v[id_m] = MAX_VAL_HEURISTIC return elif tmp_victory == 1 and game.color == "WHITE": # King escaped and white player -> Win v[id_m] = -MAX_VAL_HEURISTIC return # Body if all_actions is None: all_actions = game.produce_actions(state) if hash_result is not None: add_to_hash(state_hash_table, state_hash, hash_result['value'], all_actions) if len(all_actions) == 0: return -MAX_VAL_HEURISTIC return_values = [np.inf for x in range(len(all_actions))] lock_p.acquire() best_score = [alpha[0]] lock_p.release() thread_list = [] lock_m = Lock() a = all_actions[0] thread = Thread(target=min_value, args=(State(second_init_args=(state, a[0], a[1], a[2], a[3], a[4])), game, best_score, beta, depth + 1, max_depth, time_start, state_hash_table, 0, return_values, lock_m)) thread.start() thread.join() if return_values[0] > best_score[0]: best_score[0] = return_values[0] for i in range(len(all_actions[1:int(len(all_actions) / 2)])): a = all_actions[i] lock_p.acquire() if alpha[0] > best_score[0]: best_score[0] = alpha[0] lock_p.release() thread_list.append( Thread(target=min_value, args=(State(second_init_args=(state, a[0], a[1], a[2], a[3], a[4])), game, best_score, beta, depth + 1, max_depth, time_start, state_hash_table, i, return_values, lock_m))) thread_list[i].start() flag_t = True while flag_t: flag_t = False for i in range(len(thread_list)): if not thread_list[i].is_alive(): lock_m.acquire() if return_values[i] >= beta[0]: v[id_m] = return_values[i] lock_m.release() return best_score[0] = max(best_score[0], return_values[i]) lock_m.release() else: flag_t = True for i in range(len(thread_list)): thread_list[i].join() thread_list = [] for i in range(len(all_actions[int(len(all_actions) / 2) + 1:])): a = all_actions[i] lock_p.acquire() if alpha[0] > best_score[0]: best_score[0] = alpha[0] lock_p.release() thread_list.append( Thread(target=min_value, args=(State(second_init_args=(state, a[0], a[1], a[2], a[3], a[4])), game, best_score, beta, depth + 1, max_depth, time_start, state_hash_table, i, return_values, lock_m))) thread_list[i].start() flag_t = True while flag_t: flag_t = False for i in range(len(thread_list)): if not thread_list[i].is_alive(): lock_m.acquire() if return_values[i] >= beta[0]: v[id_m] = return_values[i] lock_m.release() return best_score[0] = max(best_score[0], return_values[i]) lock_m.release() else: flag_t = True for i in range(len(thread_list)): thread_list[i].join() v[id_m] = return_values[-1] return
def choose_action(state, game, state_hash_table): """ Search for the best action using min max with alpha beta pruning iteratively increasing the maximum depth. It stops only when available time is almost up. """ time_start = time.time() max_depth = 2 flag = False best_score_end = np.inf best_action_end = None best_action = None beta = np.inf flag_time = [False] best_scores = [] all_actions = game.produce_actions( state) # Getting all possible actions given state if len(all_actions) > 0: thread_list = [] active = [] action = [] while time.time() - time_start < game.max_time: best_score = [-np.inf] for i in range(len(best_scores)): best_scores[i] = -np.inf for j in range(len(all_actions)): a = all_actions[j] if j == 0: v = min_value( State(second_init_args=(state, a[0], a[1], a[2], a[3], a[4])), game, best_score[0], beta, 1, max_depth, time_start, state_hash_table) if v > best_score[0]: best_score[0] = v best_action = a else: if len(thread_list) < N_THREAD: active.append(False) action.append(a) best_scores.append(np.inf) thread_list.append( Thread(target=search_thread, args=(state, action, game, best_score, beta, 1, max_depth, time_start, state_hash_table, flag_time, active, len(thread_list), best_scores))) thread_list[len(thread_list) - 1].start() else: flag_assign = True while flag_assign: for i in range(len(thread_list)): lock_bool.acquire() tmp = active[i] lock_bool.release() if not tmp: lock_value.acquire() if best_scores[i] > best_score[0]: best_score[0] = best_scores[i] best_action = action[i] lock_value.release() action[i] = a lock_bool.acquire() active[i] = True lock_bool.release() flag_assign = False break if time.time() - time_start >= game.max_time: lock_time.acquire() flag_time[0] = True lock_time.release() break flag_t = True while flag_t and not flag_time[0]: flag_t = False for i in range(N_THREAD): lock_bool.acquire() tmp = active[i] lock_bool.release() if tmp: flag_t = True if time.time() - time_start >= game.max_time: lock_time.acquire() flag_time[0] = True lock_time.release() # If search at current maximum depth is finished, update best action tmp_time = flag_time[0] if not tmp_time: for i in range(len(thread_list)): if best_scores[i] > best_score[0]: best_score[0] = best_scores[i] best_action = action[i] best_score_end = best_score[0] best_action_end = best_action flag = True print("Depth reached:", max_depth) elif flag: print("Depth reached:", max_depth - 1) else: print("Minimum depth not reached") max_depth += 1 # Iteratively increasing depth for i in range(len(thread_list)): thread_list[i].join() return best_action_end, best_score_end