def predict(go_env, info_env, level, player, enemy = False): go_env_pred = copy(go_env) info = info_env strategy = choose_strategy() if(strategy == 'P'): #print("{ Estrategia: pasar turno :c }") return 49 invalid_moves = get_invalidMoves(info["invalid_moves"]) #playsinthefuture = np.count_nonzero(info["invalid_moves"] == 0) - 1 nextPlays = seeInFurture(go_env_pred, invalid_moves, level, player, strategy) if not enemy: print(nextPlays.flatten()) print("El jugador "+player+" ha usado la estrategia "+strategy) black_area, white_area = gogame.areas(go_env_pred.state_) if player == "white": if nextPlays.size != 0 and nextPlays[0, 1] > 0: # or nextPlays[0, 2] < black_area: play = random.randrange(len(nextPlays)) # Si hay más de una jugada que promete un mismo max ptj, se elige al azar return int(nextPlays[play, 0]) else: return 49 else: if nextPlays.size != 0 and nextPlays[0, 1] > 0: # or nextPlays[0, 2] < black_area: play = random.randrange(len(nextPlays)) # Si hay más de una jugada que promete un mismo max ptj, se elige al azar return int(nextPlays[play, 0]) else: return 49
def draw_info(batch, window_width, window_height, upper_grid_coord, state): turn = gogame.turn(state) turn_str = 'B' if turn == govars.BLACK else 'W' prev_player_passed = gogame.prev_player_passed(state) game_ended = gogame.game_ended(state) info_label = "Turn: {}\nPassed: {}\nGame: {}".format( turn_str, prev_player_passed, "OVER" if game_ended else "ONGOING") pyglet.text.Label(info_label, font_name='Helvetica', font_size=11, x=window_width - 20, y=window_height - 20, anchor_x='right', anchor_y='top', color=(0, 0, 0, 192), batch=batch, width=window_width / 2, align='right', multiline=True) # Areas black_area, white_area = gogame.areas(state) pyglet.text.Label("{}B | {}W".format(black_area, white_area), font_name='Helvetica', font_size=16, x=window_width / 2, y=upper_grid_coord + 80, anchor_x='center', color=(0, 0, 0, 192), batch=batch, width=window_width, align='center')
def predict(go_env, info_env, level, player, n_plays, enemy=False, smart=False): go_env_pred = copy(go_env) info = info_env strategy = choose_strategy() if (strategy == 'P'): return 49 # Pasar de turno invalid_moves = get_invalidMoves(info["invalid_moves"]) nextPlays = seeInFurture(go_env_pred, invalid_moves, level, player, strategy, n_plays, smart) if not enemy and not smart: #Descomentar para obtener mas detalle de los movimientos y estrategias #print(nextPlays.flatten()) #print("El jugador "+player+" ha usado la estrategia "+strategy) pass black_area, white_area = gogame.areas(go_env_pred.state_) if player == "white": if nextPlays.size != 0 and nextPlays[ 0, 1] > 0: # or nextPlays[0, 2] < black_area: play = random.randrange( len(nextPlays) ) # Si hay más de una jugada que promete un mismo max ptj, se elige al azar return int(nextPlays[play, 0]) else: return 49 else: if nextPlays.size != 0 and nextPlays[ 0, 1] > 0: # or nextPlays[0, 2] < black_area: play = random.randrange( len(nextPlays) ) # Si hay más de una jugada que promete un mismo max ptj, se elige al azar return int(nextPlays[play, 0]) else: return 49
def reward(self): ''' Return reward based on reward_method. heuristic: black total area - white total area real: 0 for in-game move, 1 for winning, 0 for losing, 0.5 for draw, from black player's perspective. Winning and losing based on the Area rule Also known as Trump Taylor Scoring Area rule definition: https://en.wikipedia.org/wiki/Rules_of_Go#End ''' if self.reward_method == RewardMethod.REAL: return self.winner() elif self.reward_method == RewardMethod.HEURISTIC: black_area, white_area = gogame.areas(self.state_) area_difference = black_area - white_area komi_correction = area_difference - self.komi if self.game_ended(): return (1 if komi_correction > 0 else -1) * self.size**2 return komi_correction else: raise Exception("Unknown Reward Method")
def seeInFurture(go_env_pred, invalidPlays, lvls, player, strategy, first = True): counter = 0 playPoints = np.empty([0,2]) maxPoints = 0 tmpPoints = 0 parentMove = np.empty([0,0]) if player == "white": enemy = "black" else: enemy = "white" for counter in range(49): if counter not in invalidPlays: # Captura el puntaje de las jugadas de nivel n if len(invalidPlays) <= 1: # Primera jugada para ambos da 1 pt, no 49 >:c pts = 1.0 else: tmp_env = copy(go_env_pred) prev_black_area, prev_white_area = gogame.areas(tmp_env.state_) tmp_env.step(counter) black_area, white_area = gogame.areas(tmp_env.state_) # Guarda mejores ptjs, si no es lvl == 1, crea una lista de movimientos prometedores. pts = countingPoints(strategy, prev_black_area, prev_white_area, black_area, white_area, player) # area ganada + area quitada if lvls == 1: # Crea lista de jugadas prometedoras del nivel más profundo y setea el ptj maximo if pts > maxPoints: maxPoints = pts playPoints = np.array([[counter, pts]]) elif pts == maxPoints: playPoints = np.append(playPoints, [[counter, pts]], axis=0) else: # Si no es el lvl 1, crea lista con jugadas prometedoras a analizar y setea ptj maximo if pts > tmpPoints: tmpPoints = pts parentMove = np.array([counter]) elif pts == tmpPoints: parentMove = np.append(parentMove, counter) lvls = lvls - 1 # Bajamos un nivel en el arbol if first and tmpPoints == 0: # Si las jugadas inmediatamente futuras tienen ptj max 0, se cancela la prediccion parentMove = np.empty([0,0]) if lvls: # Si llegamos al nivel 0, significa que ya pasamos el ultimo nivel, es decir el 1 tmp_max = 0 for i in parentMove: # Llama recursivamente a seeInFuture y obtiene el max ptj de esa rama tmp_env = copy(go_env_pred) state, reward, done, info = tmp_env.step(int(i)) # Turno jugador enemy_action = predict(tmp_env, info, 1, enemy, True) # Predecir estrategia y movimiento de adversario state, reward, done, info = tmp_env.step(enemy_action) # Enemigos pasan (Supuesto) <-- Incertidumbre!!! o.o tmp_plays = get_invalidMoves(info["invalid_moves"]) tmp_max = seeInFurture(tmp_env, tmp_plays, lvls, player, strategy, False)# Max Ptj lvl inferior if tmp_max > maxPoints and not first: maxPoints = tmp_max elif first: # si estamos en la rama principal, es decir, siguiente jugada, guardamos jugada y max pje de la rama if tmp_max == maxPoints: # Si tienen igual ptj se añade a la lista if not maxPoints: # Si el ptj max(tmp_max) de 1 lvl mas abajo es 0, se asigna el ptj max del lvl actual tmp_max = tmpPoints playPoints = np.append(playPoints, [[i, tmp_max]], axis=0) elif tmp_max > maxPoints: # Si se encuentr un ptj mayor, se resetea la lista y setea el max playPoints = np.array([[i, tmp_max]]) maxPoints = tmp_max if not maxPoints: maxPoints = tmpPoints if first: # Si es el primer nivel, devolvemos la jugada junto al pje max de sus hijos maxPoints = playPoints return maxPoints
def seeInFurture(go_env_pred, invalidPlays, lvls, player, strategy, n_plays, smart, first=True): counter = 0 playPoints = np.empty([0, 2]) maxPoints = 0 tmpPoints = 0 parentMove = np.empty([0, 0]) if player == "white": enemy = "black" else: enemy = "white" valid_Plays = np.empty([0, 0]) #Obtener jugadas validas for counter in range(49): if counter not in invalidPlays: valid_Plays = np.append(valid_Plays, counter) #Elegir jugadas al azar entre las jugadas validas disponibles for iter in range(n_plays): if n_plays > len(valid_Plays): break rnd = random.randrange(0, len(valid_Plays) - 1) counter = int(valid_Plays[rnd]) np.delete(valid_Plays, rnd) if len(invalidPlays ) <= 1: # Primera jugada para ambos da 1 pt, no 49 >:c pts = 1.0 else: if (smart == True): tmp_env = copy(go_env_pred) tmp_env.step(counter) blk = tmp_env.state_[0].flatten() wht = tmp_env.state_[1].flatten() tablero = blk tablero = np.where(tablero == 1, -1, 0) for i in range(len(wht)): if int(wht[i]) == 1: np.put(tablero, i, 1) tablero = tablero.reshape(1, 7, 7, 1) #Calcular prob de ganar haciendo esta jugada if player == "white": pts = model.predict(tf.convert_to_tensor(tablero))[0][0] else: pts = model.predict(tf.convert_to_tensor(tablero))[0][1] if pts < 0.25: # Ganar pts = model.predict(tf.convert_to_tensor(tablero))[0][2] if pts < 0.3: # Empatar pts = 0 # Si la prob de ganar y empatar es muy baja pasa de turno else: tmp_env = copy(go_env_pred) prev_black_area, prev_white_area = gogame.areas(tmp_env.state_) tmp_env.step(counter) black_area, white_area = gogame.areas(tmp_env.state_) # Guarda mejores ptjs, si no es lvl == 1, crea una lista de movimientos prometedores. pts = countingPoints(strategy, prev_black_area, prev_white_area, black_area, white_area, player) # area ganada + area quitada if lvls == 1: # Crea lista de jugadas prometedoras del nivel más profundo y setea el ptj maximo if pts > maxPoints: maxPoints = pts playPoints = np.array([[counter, pts]]) elif pts == maxPoints: playPoints = np.append(playPoints, [[counter, pts]], axis=0) else: # Si no es el lvl 1, crea lista con jugadas prometedoras a analizar y setea ptj maximo if pts > tmpPoints: tmpPoints = pts parentMove = np.array([counter]) elif pts == tmpPoints: parentMove = np.append(parentMove, counter) lvls = lvls - 1 # Bajamos un nivel en el arbol if first and tmpPoints == 0: # Si las jugadas inmediatamente futuras tienen ptj max 0, se cancela la prediccion parentMove = np.empty([0, 0]) if lvls: # Si llegamos al nivel 0, significa que ya pasamos el ultimo nivel, es decir el 1 tmp_max = 0 for i in parentMove: # Llama recursivamente a seeInFuture y obtiene el max ptj de esa rama tmp_env = copy(go_env_pred) state, reward, done, info = tmp_env.step(int(i)) # Turno jugador #print(state, "-", reward, "-", done, "-", info) enemy_action = predict( tmp_env, info, 1, enemy, 3, True) # Predecir estrategia y movimiento de adversario state, reward, done, info = tmp_env.step( enemy_action ) # Enemigos pasan (Supuesto) <-- Incertidumbre!!! o.o tmp_plays = get_invalidMoves(info["invalid_moves"]) tmp_max = seeInFurture(tmp_env, tmp_plays, lvls, player, strategy, 3, smart, False) # Max Ptj lvl inferior if tmp_max > maxPoints and not first: maxPoints = tmp_max elif first: # si estamos en la rama principal, es decir, siguiente jugada, guardamos jugada y max pje de la rama if tmp_max == maxPoints: # Si tienen igual ptj se añade a la lista if not maxPoints: # Si el ptj max(tmp_max) de 1 lvl mas abajo es 0, se asigna el ptj max del lvl actual tmp_max = tmpPoints playPoints = np.append(playPoints, [[i, tmp_max]], axis=0) elif tmp_max > maxPoints: # Si se encuentr un ptj mayor, se resetea la lista y setea el max playPoints = np.array([[i, tmp_max]]) maxPoints = tmp_max if not maxPoints: maxPoints = tmpPoints if first: # Si es el primer nivel, devolvemos la jugada junto al pje max de sus hijos maxPoints = playPoints return maxPoints
blk = go_env.state_[0].flatten() wht = go_env.state_[1].flatten() tablero = blk tablero = np.where(tablero == 1, "-1", 0) for i in range(len(wht)): if int(wht[i]) == 1: np.put(tablero, i, "1") tablero = ' '.join(tablero) print(tablero) black_area, white_area = gogame.areas(go_env.state_) if white_area > black_area: out = 0 # Blanco gana elif white_area < black_area: out = 1 # Negro gana else: out = 2 #Empate dataset = open("dataset.csv", "a") dataset.write(str(out) + "," + tablero + "\n") dataset.close() if n_stages != 0: