def random_to_random(): win_num = 0 for j in range(10000): pos = Position(empty_board,'A',0,-1) step = 0 while True: try: index = pos.pick_move()[0] except Exception: win_num+=0.5 break pos = pos.move(index) if pos.reward()!=0: win_num+=1 break try: index2 = pos.pick_move()[0] except Exception: win_num+=0.5 break pos = pos.move(index2) if pos.reward()!=0: break step+=1 if step>=N*N: win_num+=0.5 pos.show() return win_num/100.0
def evaluate_random(net): win_num = 0 for j in range(100): pos = Position(empty_board,'A',0,-1) step = 0 while True: pos_array = encode_position(pos) pos_array_new = np.expand_dims(pos_array,axis=0) dis = net.predict_distribution(pos_array_new) index = np.argmax(dis) while index<N*N and pos.board[index]!='.': dis[index] = 0.0 index = np.argmax(dis) if index == N*N: index = pos.pick_move()[0] if pos.board[index]!='.': win_num+=0.5 break pos = pos.move(index) if pos.reward()!=0: win_num+=1 break try: index2 = pos.pick_move()[0] except Exception: win_num+=0.5 break pos = pos.move(index2) if pos.reward()!=0: break step+=1 if step>=N*N: win_num+=0.5 pos.show() return win_num/100.0