def get(self): self.response.headers['Content-Type'] = 'text/html' template_values = { "chart_src": "../static/images/icon1.jpg" } self.response.write(render_str("home.html", template_values))
def render(self): # replace new line characters with breaks self.__render_text = self.content.replace('\n', '<br>') comments_query = Comment.query(Comment.post == self.key)\ .order(-Comment.created) num_comments = comments_query.count() if num_comments == 1: text = "comment" else: text = "comments" comment_count = "{} {}".format(num_comments, text) likes_query = Like.query(Like.post == self.key) num_likes = likes_query.count() if num_likes == 1: text = "like" else: text = "likes" like_count = "{} {}".format(num_likes, text) user = self.user.get() return utils.render_str('post.html', post=self, author=user, comment_count=comment_count, like_count=like_count)
def render(self, username): self._isAuthor = False if username == self.author.name: self._isAuthor = True self._render_text = self.content.replace('\n', '<br>') self._key = self.key().id() return utils.render_str("comment.html", c=self)
def render(self, user=None): # replace new line characters with breaks self.__render_text = self.content.replace('\n', '<br>') comment_user = self.user.get() return utils.render_str('comment.html', comment=self, comment_user=comment_user, user=user)
def showPermalink(self, username): """ Post class render permalink """ self._render_text = self.content.replace('\n', '<br>') self._key = self.key().id() self._can_edit = False self._is_permalink = True if self.author.name == username: self._can_edit = True return utils.render_str("editpost.html", title="Edit post", username=username, author=self.author.name, subject=self.subject, content=self._render_text, key=self._key, error="") else: return utils.render_str("post.html", p=self)
def SOAPRequest(self, operation, input_parameters={}): parameters = dict(self.credentials.items() + input_parameters.items()) request = utils.render_str("soap/" + operation + ".xml", parameters) response = None try: response = urlfetch.fetch(url='http://ideone.com/api/1/service', method=urlfetch.POST, payload=request, deadline=10, headers={'Content-Type:':'text/xml; charset=UTF-8'}) except Exception, e: return {'error':'timeout'}
def render(self, username): """ Post class render method """ self._render_text = self.content.replace('\n', '<br>') self._key = self.key().id() self._can_edit = False key = db.Key.from_path('Post', int(self._key), parent=utils.blog_key()) if self.author.name == username: self._can_edit = True return utils.render_str("post.html", p=self)
def main(): turn = 0 enemy_turn = 1 board = np.zeros([17, 17]) root_id = (0,) win_index = 0 action_index = None GameInfo.game_board = board # 게임이 시작된 후 끝날때까지 진행되는 메인 코드 while win_index == 0: utils.render_str(board) # 콘솔에 보드 상태 출력 # 다음 액션에 대한 입력을 대기 action, action_index = evaluator.get_action(root_id, board, turn, enemy_turn) # 액션 실행 if turn != enemy_turn: root_id = evaluator.player.root_id + action + (action_index,) else:
def render_post(self): self._id = self.key().id() self._render_text = self.content.replace('\n', '<br>') return render_str("post.html", post=self)
def main(): # 에이전트 설정 evaluator.set_agents(player_model_path, enemy_model_path, monitor_model_path) # 웹 서버에 각 agent들의 정보 전달 player_agent_info.agent = evaluator.player enemy_agent_info.agent = evaluator.enemy env = evaluator.return_env() # env.env_small.GameState() result = {'Player': 0, 'Enemy': 0, 'Draw': 0} # 승, 패, 무승부 turn = 0 # 플레이어 턴 enemy_turn = 1 # 적 턴 player_elo = 1500 # 플레이어 레이팅 enemy_elo = 1500 # 적 레이팅 # 웹 서버에 적 턴 변수와 게임 상태 전달 game_info.enemy_turn = enemy_turn game_info.game_status = 0 # 플레이어와 적의 레이팅 출력 print('Player ELO: {:.0f}, Enemy ELO: {:.0f}'.format( player_elo, enemy_elo)) # N_MATCH 번의 매치 실행 for i in range(N_MATCH): board = np.zeros([BOARD_SIZE, BOARD_SIZE ]) # 가로, 세로 BOARD_SIZE크기인 2차원 array형태의 판 생성 root_id = (0, ) # 지금까지의 착수 위치들이 기록됨 win_index = 0 # 승패 결과 (0: 플레이 중, 1: 흑 승, 2: 백 승, 3: 무승부) action_index = None # 현재 턴의 착수 위치 # 웹 서버에 게임판의 정보 전달 game_info.game_board = board # 한 게임마다 선공을 바꿈 if i % 2 == 0: print('Player Color: Black') else: print('Player Color: White') # 0:Running 1:Player Win, 2: Enemy Win 3: Draw game_info.game_status = 0 # 승패가 결정날때 까지 실행되는 게임의 메인 루프 while win_index == 0: utils.render_str(board, BOARD_SIZE, action_index) # 게임판을 콘솔창에 텍스트 형식으로 출력 # agents.ZeroAgent().get_pv() 호출 # policy(각 착수위치의 승리 가능성)와 value(이번 턴의 플레이어의 승리 가능성이 높으면 높은 값)를 받음 p, v = evaluator.monitor.get_pv(root_id) # 착수위치를 입력받음 # action : (boradsize**2)크기의 1차원 array에 착수위치가 입력됨 # action_index : 착수위치의 index action, action_index = evaluator.get_action( root_id, board, turn, enemy_turn) # root_id 에 현재 착수위치 추가 if turn != enemy_turn: # player turn root_id = evaluator.player.root_id + (action_index, ) else: # enemy turn root_id = evaluator.enemy.root_id + (action_index, ) # 게임판, 유효 착수위치인지, 게임 진행상황, 턴 정보를 받음 board, check_valid_pos, win_index, turn, _ = env.step(action) # 위의 정보들을 웹 서버에 전달 game_info.game_board = board game_info.action_index = int(action_index) game_info.win_index = win_index game_info.curr_turn = turn # 0 black 1 white # 몇 번째 턴인지 셈 move = np.count_nonzero(board) if turn == enemy_turn: # enemy turn # 플레이어가 human이나 Web이면 monitor에이전트의 visit과 policy를 웹 서버에 전달 if isinstance(evaluator.player, agents.HumanAgent) or \ isinstance(evaluator.player, agents.WebAgent): player_agent_info.visit = evaluator.monitor.get_visit() player_agent_info.p = evaluator.monitor.get_policy() else: # 아니면 player에이전트의 visit과 policy를 웹 서버에 전달 player_agent_info.visit = evaluator.player.get_visit() player_agent_info.p = evaluator.player.get_policy() player_agent_info.add_value(move, v) # 웹 서버에 move와 v전달 evaluator.enemy.del_parents( root_id) # 적 에이전트의 tree에서 root_id보다 짧은 트리를 모두 삭제 else: # player turn # 웹 서버에 적 에이전트의 visit, policy, move, v 전달 enemy_agent_info.visit = evaluator.enemy.get_visit() enemy_agent_info.p = evaluator.enemy.get_policy() enemy_agent_info.add_value(move, v) evaluator.player.del_parents( root_id) # human 에이전트에선 별다른 동작을 하지 않는다 if win_index != 0: # 승패가 결정됐다면 # 초기화 player_agent_info.clear_values() enemy_agent_info.clear_values() # 0:Running 1:Player Win, 2: Enemy Win 3: Draw game_info.game_status = win_index # 웹 서버에 승패 전달 if turn == enemy_turn: # 적 턴이면 if win_index == 3: # 무승부 result['Draw'] += 1 print('\nDraw!') player_elo, enemy_elo = elo(player_elo, enemy_elo, 0.5, 0.5) # 레이팅 변동 없음 else: # 플레이어가 마지막 돌을 착수 후 턴이 바뀌고 게임이 끝나므로 플레이어 승 result['Player'] += 1 print('\nPlayer Win!') player_elo, enemy_elo = elo(player_elo, enemy_elo, 1, 0) else: # 플레이어 턴이면 (위와 동일) if win_index == 3: result['Draw'] += 1 print('\nDraw!') player_elo, enemy_elo = elo(player_elo, enemy_elo, 0.5, 0.5) else: result['Enemy'] += 1 print('\nEnemy Win!') player_elo, enemy_elo = elo(player_elo, enemy_elo, 0, 1) utils.render_str(board, BOARD_SIZE, action_index) # 게임판 렌더링 # 선후공 교체 enemy_turn = abs(enemy_turn - 1) turn = 0 # 웹 서버에 선후공과 턴 정보 전달 game_info.enemy_turn = enemy_turn game_info.curr_turn = turn # 게임 결과 요약 출력 pw, ew, dr = result['Player'], result['Enemy'], result['Draw'] winrate = (pw + 0.5 * dr) / (pw + ew + dr) * 100 print('') print('=' * 20, " {} Game End ".format(i + 1), '=' * 20) print('Player Win: {}' ' Enemy Win: {}' ' Draw: {}' ' Winrate: {:.2f}%'.format(pw, ew, dr, winrate)) print('Player ELO: {:.0f}, Enemy ELO: {:.0f}'.format( player_elo, enemy_elo)) evaluator.reset() # evaluator리셋
def render_history(self): self._render_text = self.content.replace('\n', '') return utils.render_str("history_edits.html", p = self)
def main(): print('cuda:', use_cuda) # g_evaluator = evaluator env = game.GameState('text') result = {'Player': 0, 'Enemy': 0, 'Draw': 0} turn = 0 enemy_turn = 1 gi.enemy_turn = enemy_turn player_elo = 1500 enemy_elo = 1500 print('Player ELO: {:.0f}, Enemy ELO: {:.0f}'.format( player_elo, enemy_elo)) # i = 0 for i in range(N_MATCH): board = np.zeros([BOARD_SIZE, BOARD_SIZE]) root_id = (0, ) win_index = 0 action_index = None if i % 2 == 0: print('Player Color: Black') else: print('Player Color: White') while win_index == 0: utils.render_str(board, BOARD_SIZE, action_index) action, action_index = evaluator.get_action( root_id, board, turn, enemy_turn) p, v = evaluator.get_pv(root_id, turn, enemy_turn) if turn != enemy_turn: # player turn root_id = evaluator.player.root_id + (action_index, ) else: # enemy turn root_id = evaluator.enemy.root_id + (action_index, ) board, check_valid_pos, win_index, turn, _ = env.step(action) # WebAPI gi.game_board = board gi.action_index = int(action_index) gi.win_index = win_index gi.curr_turn = turn move = np.count_nonzero(board) if evaluator.get_player_visit() is not None: player_agent_info.visit = evaluator.get_player_visit() if evaluator.get_enemy_visit() is not None: enemy_agent_info.visit = evaluator.get_enemy_visit() if turn == enemy_turn: evaluator.enemy.del_parents(root_id) player_agent_info.add_value(move, v) player_agent_info.p = p else: evaluator.player.del_parents(root_id) enemy_agent_info.add_value(move, v) enemy_agent_info.p = p # used for debugging if not check_valid_pos: raise ValueError('no legal move!') if win_index != 0: player_agent_info.clear_values() enemy_agent_info.clear_values() if turn == enemy_turn: if win_index == 3: result['Draw'] += 1 print('\nDraw!') player_elo, enemy_elo = elo(player_elo, enemy_elo, 0.5, 0.5) else: result['Player'] += 1 print('\nPlayer Win!') player_elo, enemy_elo = elo(player_elo, enemy_elo, 1, 0) else: if win_index == 3: result['Draw'] += 1 print('\nDraw!') player_elo, enemy_elo = elo(player_elo, enemy_elo, 0.5, 0.5) else: result['Enemy'] += 1 print('\nEnemy Win!') player_elo, enemy_elo = elo(player_elo, enemy_elo, 0, 1) utils.render_str(board, BOARD_SIZE, action_index) # Change turn enemy_turn = abs(enemy_turn - 1) gi.enemy_turn = enemy_turn turn = 0 pw, ew, dr = result['Player'], result['Enemy'], result['Draw'] winrate = (pw + 0.5 * dr) / (pw + ew + dr) * 100 print('') print('=' * 20, " {} Game End ".format(i + 1), '=' * 20) print('Player Win: {}' ' Enemy Win: {}' ' Draw: {}' ' Winrate: {:.2f}%'.format(pw, ew, dr, winrate)) print('Player ELO: {:.0f}, Enemy ELO: {:.0f}'.format( player_elo, enemy_elo)) evaluator.reset()
def render(self): self._render_text = self.content.replace('\n', '<br>') return utils.render_str("post.html", p=self, nlikers=len(list(self.likers)), ndislikers=len(list(self.dislikers)), ncomments=len(list(self.comments)))
def self_play(n_selfplay): global cur_memory, rep_memory global Agent state_black = deque() state_white = deque() pi_black = deque() pi_white = deque() if RESIGN_MODE: resign_val_balck = [] resign_val_white = [] resign_val = [] resign_v = -1.0 n_resign_thres = N_SELFPLAY // 4 for episode in range(n_selfplay): if (episode + 1) % 10 == 0: logging.warning('Playing Episode {:3}'.format(episode + 1)) env = game.GameState('text') board = np.zeros((BOARD_SIZE, BOARD_SIZE), 'float') turn = 0 root_id = (0, ) win_index = 0 time_steps = 0 action_index = None if RESIGN_MODE: resign_index = 0 while win_index == 0: if PRINT_SELFPLAY: utils.render_str(board, BOARD_SIZE, action_index) # ====================== start MCTS ============================ # if time_steps < TAU_THRES: tau = 1 else: tau = 0 pi = Agent.get_pi(root_id, tau) # ===================== collect samples ======================== # state = utils.get_state_pt(root_id, BOARD_SIZE, IN_PLANES) if turn == 0: state_black.appendleft(state) pi_black.appendleft(pi) else: state_white.appendleft(state) pi_white.appendleft(pi) # ======================== get action ========================== # action, action_index = utils.get_action(pi) root_id += (action_index, ) # ====================== print evaluation ====================== # if PRINT_SELFPLAY: Agent.model.eval() with torch.no_grad(): state_input = torch.tensor([state]).to(device).float() p, v = Agent.model(state_input) p = p.cpu().numpy()[0] v = v.item() print('\nPi:\n{}'.format( pi.reshape(BOARD_SIZE, BOARD_SIZE).round(decimals=2))) print('\nPolicy:\n{}'.format( p.reshape(BOARD_SIZE, BOARD_SIZE).round(decimals=2))) if turn == 0: print("\nBlack's win%: {:.2f}%".format((v + 1) / 2 * 100)) if RESIGN_MODE: if episode < n_resign_thres: resign_val_balck.append(v) elif v < resign_v: resign_index = 2 if PRINT_SELFPLAY: print('"Black Resign!"') else: print("\nWhite's win%: {:.2f}%".format((v + 1) / 2 * 100)) if RESIGN_MODE: if episode < n_resign_thres: resign_val_white.append(v) elif v < resign_v: resign_index = 1 if PRINT_SELFPLAY: print('"White Resign!"') # =========================== step ============================= # board, _, win_index, turn, _ = env.step(action) time_steps += 1 # ========================== result ============================ # if RESIGN_MODE: if resign_index != 0: win_index = resign_index result['Resign'] += 1 if win_index != 0: if win_index == 1: reward_black = 1. reward_white = -1. result['Black'] += 1 if RESIGN_MODE: if episode < n_resign_thres: for val in resign_val_balck: resign_val.append(val) resign_val_balck.clear() resign_val_white.clear() elif win_index == 2: reward_black = -1. reward_white = 1. result['White'] += 1 if RESIGN_MODE: if episode < n_resign_thres: for val in resign_val_white: resign_val.append(val) resign_val_white.clear() resign_val_balck.clear() else: reward_black = 0. reward_white = 0. result['Draw'] += 1 if RESIGN_MODE: if episode < n_resign_thres: for val in resign_val_balck: resign_val.append(val) for val in resign_val_white: resign_val.append(val) resign_val_balck.clear() resign_val_white.clear() if RESIGN_MODE: if episode + 1 == n_resign_thres: resign_v = min(resign_val) resign_val.clear() if PRINT_SELFPLAY: print('Resign win%: {:.2f}%'.format( (resign_v + 1) / 2 * 100)) # ====================== store in memory ======================= # while state_black or state_white: if state_black: cur_memory.append( (state_black.pop(), pi_black.pop(), reward_black)) if state_white: cur_memory.append( (state_white.pop(), pi_white.pop(), reward_white)) # ========================= result =========================== # if PRINT_SELFPLAY: utils.render_str(board, BOARD_SIZE, action_index) bw, ww, dr, rs = result['Black'], result['White'], \ result['Draw'], result['Resign'] print('') print('=' * 20, " {:3} Game End ".format(episode + 1), '=' * 20) print('Black Win: {:3} ' 'White Win: {:3} ' 'Draw: {:2} ' 'Win%: {:.2f}%' '\nResign: {:2}'.format(bw, ww, dr, (bw + 0.5 * dr) / (bw + ww + dr) * 100, rs)) print('current memory size:', len(cur_memory)) Agent.reset() rep_memory.extend(utils.augment_dataset(cur_memory, BOARD_SIZE))
def render(self): self._render_text = self.content.replace('\n', '<br>') return render_str("partials/comment.html", c=self)
def render(self): self._render_text = self.content.replace('\n', '<br>') return utils.render_str("post.html", p = self)
def render(self, logged_in_user): self._render_text = self.content.replace('\n', '<br>') return utils.render_str("comment.html", c=self, logged_in_user=logged_in_user)
def render_str(self, template, **params): params['user'] = self.user return utils.render_str(template, **params)
def render(self): self._render_text = self.content return utils.render_str("content.html", p=self)
def main(): evaluator = Evaluator(player_model_path, enemy_model_path) env = evaluator.return_env() result = {'Player': 0, 'Enemy': 0, 'Draw': 0} turn = 0 enemy_turn = 1 player_elo = 1500 enemy_elo = 1500 print('Player ELO: {:.0f}, Enemy ELO: {:.0f}'.format( player_elo, enemy_elo)) for i in range(N_MATCH): board = np.zeros([BOARD_SIZE, BOARD_SIZE]) root_id = (0, ) win_index = 0 action_index = None if i % 2 == 0: print('Player Color: Black') else: print('Player Color: White') while win_index == 0: utils.render_str(board, BOARD_SIZE, action_index) action, action_index = evaluator.get_action( root_id, board, turn, enemy_turn) if turn != enemy_turn: # player turn root_id = evaluator.player.root_id + (action_index, ) else: # enemy turn root_id = evaluator.enemy.root_id + (action_index, ) board, check_valid_pos, win_index, turn, _ = env.step(action) if turn == enemy_turn: evaluator.enemy.del_parents(root_id) else: evaluator.player.del_parents(root_id) if win_index != 0: if turn == enemy_turn: if win_index == 3: result['Draw'] += 1 print('\nDraw!') player_elo, enemy_elo = elo(player_elo, enemy_elo, 0.5, 0.5) else: result['Player'] += 1 print('\nPlayer Win!') player_elo, enemy_elo = elo(player_elo, enemy_elo, 1, 0) else: if win_index == 3: result['Draw'] += 1 print('\nDraw!') player_elo, enemy_elo = elo(player_elo, enemy_elo, 0.5, 0.5) else: result['Enemy'] += 1 print('\nEnemy Win!') player_elo, enemy_elo = elo(player_elo, enemy_elo, 0, 1) utils.render_str(board, BOARD_SIZE, action_index) # Change turn enemy_turn = abs(enemy_turn - 1) turn = 0 pw, ew, dr = result['Player'], result['Enemy'], result['Draw'] winrate = (pw + 0.5 * dr) / (pw + ew + dr) * 100 print('') print('=' * 20, " {} Game End ".format(i + 1), '=' * 20) print('Player Win: {}' ' Enemy Win: {}' ' Draw: {}' ' Winrate: {:.2f}%'.format(pw, ew, dr, winrate)) print('Player ELO: {:.0f}, Enemy ELO: {:.0f}'.format( player_elo, enemy_elo)) evaluator.reset()
def main(): evaluator.set_agents( player_model_path, enemy_model_path, monitor_model_path) player_agent_info.agent = evaluator.player enemy_agent_info.agent = evaluator.enemy env = evaluator.return_env() result = {'Player': 0, 'Enemy': 0, 'Draw': 0} turn = 0 enemy_turn = 1 player_elo = 1500 enemy_elo = 1500 game_info.enemy_turn = enemy_turn game_info.game_status = 0 print('Player ELO: {:.0f}, Enemy ELO: {:.0f}'.format( player_elo, enemy_elo)) for i in range(N_MATCH): board = np.zeros([BOARD_SIZE, BOARD_SIZE]) root_id = (0,) win_index = 0 action_index = None game_info.game_board = board if i % 2 == 0: print('Player Color: Black') else: print('Player Color: White') # 0:Running 1:Player Win, 2: Enemy Win 3: Draw game_info.game_status = 0 while win_index == 0: utils.render_str(board, BOARD_SIZE, action_index) p, v = evaluator.monitor.get_pv(root_id) action, action_index = evaluator.get_action(root_id, board, turn, enemy_turn) if turn != enemy_turn: # player turn root_id = evaluator.player.root_id + (action_index,) else: # enemy turn root_id = evaluator.enemy.root_id + (action_index,) board, check_valid_pos, win_index, turn, _ = env.step(action) game_info.game_board = board game_info.action_index = int(action_index) game_info.win_index = win_index game_info.curr_turn = turn # 0 black 1 white move = np.count_nonzero(board) if turn == enemy_turn: if isinstance(evaluator.player, agents.HumanAgent) or \ isinstance(evaluator.player, agents.WebAgent): player_agent_info.visit = evaluator.monitor.get_visit() player_agent_info.p = evaluator.monitor.get_policy() else: player_agent_info.visit = evaluator.player.get_visit() player_agent_info.p = evaluator.player.get_policy() player_agent_info.add_value(move, v) evaluator.enemy.del_parents(root_id) else: enemy_agent_info.visit = evaluator.enemy.get_visit() enemy_agent_info.p = evaluator.enemy.get_policy() enemy_agent_info.add_value(move, v) evaluator.player.del_parents(root_id) if win_index != 0: player_agent_info.clear_values() enemy_agent_info.clear_values() # 0:Running 1:Player Win, 2: Enemy Win 3: Draw game_info.game_status = win_index if turn == enemy_turn: if win_index == 3: result['Draw'] += 1 print('\nDraw!') player_elo, enemy_elo = elo( player_elo, enemy_elo, 0.5, 0.5) else: result['Player'] += 1 print('\nPlayer Win!') player_elo, enemy_elo = elo( player_elo, enemy_elo, 1, 0) else: if win_index == 3: result['Draw'] += 1 print('\nDraw!') player_elo, enemy_elo = elo( player_elo, enemy_elo, 0.5, 0.5) else: result['Enemy'] += 1 print('\nEnemy Win!') player_elo, enemy_elo = elo( player_elo, enemy_elo, 0, 1) utils.render_str(board, BOARD_SIZE, action_index) # Change turn enemy_turn = abs(enemy_turn - 1) turn = 0 game_info.enemy_turn = enemy_turn game_info.curr_turn = turn pw, ew, dr = result['Player'], result['Enemy'], result['Draw'] winrate = (pw + 0.5 * dr) / (pw + ew + dr) * 100 print('') print('=' * 20, " {} Game End ".format(i + 1), '=' * 20) print('Player Win: {}' ' Enemy Win: {}' ' Draw: {}' ' Winrate: {:.2f}%'.format( pw, ew, dr, winrate)) print('Player ELO: {:.0f}, Enemy ELO: {:.0f}'.format( player_elo, enemy_elo)) evaluator.reset()
def render_str(self, template, **params): return utils.render_str(template, **params)
def self_play(agent, cur_memory, rank=0): agent.model.eval() state_black = deque() state_white = deque() pi_black = deque() pi_white = deque() episode = 0 while True: if (episode + 1) % 10 == 0: logging.info('Playing Episode {:3}'.format(episode + 1)) env = game.GameState('text') board = np.zeros((BOARD_SIZE, BOARD_SIZE), 'float') turn = 0 root_id = (0, ) win_index = 0 time_steps = 0 action_index = None while win_index == 0: if PRINT_SELFPLAY and rank == 0: utils.render_str(board, BOARD_SIZE, action_index) # ====================== start MCTS ============================ # if time_steps < TAU_THRES: tau = 1 else: tau = 0 pi = agent.get_pi(root_id, tau, rank) # ===================== collect samples ======================== # state = utils.get_state_pt(root_id, BOARD_SIZE, IN_PLANES) if turn == 0: state_black.appendleft(state) pi_black.appendleft(pi) else: state_white.appendleft(state) pi_white.appendleft(pi) # ======================== get action ========================== # action, action_index = utils.get_action(pi) root_id += (action_index, ) # ====================== print evaluation ====================== # if PRINT_SELFPLAY and rank == 0: with torch.no_grad(): state_input = torch.tensor([state]).to(device).float() p, v = agent.model(state_input) p = p.cpu().numpy()[0] v = v.item() print('\nPi:\n{}'.format( pi.reshape(BOARD_SIZE, BOARD_SIZE).round(decimals=2))) print('\nPolicy:\n{}'.format( p.reshape(BOARD_SIZE, BOARD_SIZE).round(decimals=2))) if turn == 0: print("\nBlack's win%: {:.2f}%".format((v + 1) / 2 * 100)) else: print("\nWhite's win%: {:.2f}%".format((v + 1) / 2 * 100)) # =========================== step ============================= # board, _, win_index, turn, _ = env.step(action) time_steps += 1 # ========================== result ============================ # if win_index != 0: if win_index == 1: reward_black = 1. reward_white = -1. result['Black'] += 1 elif win_index == 2: reward_black = -1. reward_white = 1. result['White'] += 1 else: reward_black = 0. reward_white = 0. result['Draw'] += 1 # ====================== store in memory ======================= # while state_black or state_white: if state_black: cur_memory.append( (state_black.pop(), pi_black.pop(), reward_black)) if state_white: cur_memory.append( (state_white.pop(), pi_white.pop(), reward_white)) # ========================= result =========================== # if PRINT_SELFPLAY and rank == 0: utils.render_str(board, BOARD_SIZE, action_index) bw, ww, dr = result['Black'], result['White'], \ result['Draw'] print('') print('=' * 20, " {:3} Game End ".format(episode + 1), '=' * 20) print('Black Win: {:3} ' 'White Win: {:3} ' 'Draw: {:2} ' 'Win%: {:.2f}%'.format(bw, ww, dr, (bw + 0.5 * dr) / (bw + ww + dr) * 100)) print('current memory size:', len(cur_memory)) episode += 1 agent.reset() if len(cur_memory) >= MEMORY_SIZE: return utils.augment_dataset(cur_memory, BOARD_SIZE)