예제 #1
0
    def rtn_q(self, ban, model, player_side, action):
        q = None
        ban_copy = copy.deepcopy(ban)
        ban_copy.ban_applay(player_side, action[0], action[1])

        if ban_copy.ban_win(player_side, action[0], action[1]):
            #print("win")
            q = 1
        elif ban_copy.ban_fill():
            #print("fill")
            q = 0
        else:
            state = chg_input_cnn(ban_copy, 1-player_side)
            p_ary , _ = model(state.to(self.device))
            p_ary = p_ary.detach().cpu().numpy()[0]
            p_ary_index = np.argsort(p_ary)[::-1] 
                
            ban_put_available = ban_copy.ban_put_available()
            #print(ban_put_available)
            
            lose_flg = 0
            for [r_op,c_op] in ban_put_available:
                ban_copy2 = copy.deepcopy(ban_copy)
                ban_copy2.ban_applay(1-player_side, r_op,c_op)
                if ban_copy2.ban_win(1-player_side, r_op,c_op):
                    lose_flg = 1
                    #print("lose 確定")
                    
            if lose_flg == 0:
                for index in p_ary_index:
                    r_op,c_op = index2rc(index)
                    if [r_op,c_op] in ban_put_available:
                        break
            #print("相手が打つ場所", r_op,c_op)
            ban_copy.ban_applay(1-player_side, r_op,c_op)
            #ban_copy.ban_print()
            if ban_copy.ban_win(1-player_side, r_op,c_op):
                #print("lose")
                q = -1
            elif ban_copy.ban_fill():
                #print("fill op")
                q = 0
            else:
                #print("other")
                state = chg_input_cnn(ban_copy, player_side)
                p_ary , _ = model(state.to(self.device))
                p_ary = p_ary.detach().cpu().numpy()[0]
                put_available_position = ban_copy.rtn_put_available_position()
                #print(p_ary)
                #print(put_available_position)
                #print(p_ary + put_available_position)
                q = np.max(p_ary + put_available_position)
        
        return q
예제 #2
0
def check_win_rate_random_ai_first(Env, brain, model, max_episode):  #勝率を計算する

    win_0 = 0
    win_1 = 0
    hiki = 0
    ban = Env(BANHEN, WINREN)
    brain = brain

    for episode in range(max_episode):
        print("\rstep : {0}/{1} ".format(episode, max_episode), end="")
        ban.ban_reset()
        step = 0
        while True:
            step += 1

            #print('player 1')
            player_side = 1
            state = chg_input_cnn(ban, player_side)
            action, _ = decide_action_func(model, ban, state)

            ban.ban_applay(player_side, action[0], action[1])
            #print(action)
            #ban.ban_print()
            if ban.ban_win(player_side, action[0], action[1]):
                #print('player1 win!!')
                win_1 += 1
                break
            if ban.ban_fill():
                hiki += 1
                break

            #print('player 0 random')

            player_side = 0
            action = random.choice(ban.ban_put_available())
            #action = ban.ban_put_available()[0]

            ban.ban_applay(player_side, action[0], action[1])
            #print(action)
            #ban.ban_print()

            if ban.ban_win(player_side, action[0], action[1]):
                #print('player0 win!!')
                win_0 += 1
                break
            if ban.ban_fill():
                hiki += 1
                break

        #print('episode: {}/{}, win_0(AI 0): {}({}%), win_1(AI 1): {}({}%), step: {}'
        #       .format(episode+1, max_episode, win_0, int(100*win_0/(episode+1)),win_1,int(100*win_1/(episode+1)), step))
    win_rate = 100 * win_1 / (max_episode)
    not_lose_rate = 100 * (win_1 + hiki) / (max_episode)
    return win_rate, not_lose_rate
예제 #3
0
def check_win_rate_put_1st(Env, brain, model,
                           max_episode):  #indexが小さいところから順に売っていく 負けなかった確率を返す

    not_win_0 = 0
    not_win_1 = 0
    ban = Env(BANHEN, WINREN)
    brain = brain

    for episode in range(1):
        ban.ban_reset()
        step = 0
        while True:
            step += 1
            #print('player 0 random')

            player_side = 0
            #action = random.choice(ban.ban_put_available())
            action = ban.ban_put_available()[0]

            ban.ban_applay(player_side, action[0], action[1])
            #print(action)
            #ban.ban_print()

            if ban.ban_win(player_side, action[0], action[1]):
                #print('player0 win!!')
                not_win_0 += 1
                break
            if ban.ban_fill():
                not_win_0 += 1
                break

            #print('player 1')
            player_side = 1
            state = chg_input_cnn(ban, player_side)
            action, _ = decide_action_func(model, ban, state)

            ban.ban_applay(player_side, action[0], action[1])
            #print(action)
            #ban.ban_print()
            if ban.ban_win(player_side, action[0], action[1]):
                #print('player1 win!!')
                not_win_1 += 1
                break
            if ban.ban_fill():
                not_win_1 += 1
                break
        #print('episode: {}/{}, win_0(AI 0): {}({}%), win_1(AI 1): {}({}%), step: {}'
        #       .format(episode+1, max_episode, win_0, int(100*win_0/(episode+1)),win_1,int(100*win_1/(episode+1)), step))
    win_rate = 100 * not_win_1 / (max_episode)
    return win_rate
예제 #4
0
step = 0  #何手目か
step_sum = 0
gen_num = 0  #モデルの初期値
episode_sum = 0  #エピソードの累積
search_depth = 3
ep_random_data = 0

log_print("lrはtextファイルから読み取り")

log_print('start : ' + model_filename)
start_time = datetime.datetime.now()
log_print("start time")
log_print(start_time)

#print(brain.main_model)
dummy_input = chg_input_cnn(ban, 0)
#print(dummy_input.size())

#dummy_model =  nn.DataParallel(NeuralNet_cnn(BANHEN, BANSIZE)).to(device)
#writer_x.add_graph(dummy_model)

model = NeuralNet_cnn(BANHEN, BANSIZE)
writer_x.add_graph(model, (dummy_input, ), verbose=True)

if __name__ == '__main__':
    while train_is_continue:
        for episode in range(NUM_EPISODES):  # 最大試行数分繰り返す
            episode_sum += 1
            ban.ban_reset()
            step = 0  #stepをリセット
            terminal = False  #terminalをリセット
예제 #5
0
def check_win_rate_ai(Env, brain, main_model, new_model, max_episode):
    win_main = 0
    draw = 0
    win_new = 0

    ban = Env(BANHEN, WINREN)

    for episode in range(max_episode):
        print("\rstep : {0}/{1} ".format(episode, max_episode), end="")
        ban.ban_reset()
        step = 0
        while True:  #main_model先行
            step += 1
            #print('player 0')

            player_side = 0
            state = chg_input_cnn(ban, player_side)

            if step <= 1:  #ランダムに打つ
                action = random.choice(ban.ban_put_available())
            else:
                action, _ = decide_action_func(main_model, ban, state)

            ban.ban_applay(player_side, action[0], action[1])
            #print(action)
            #ban.ban_print()

            if ban.ban_win(player_side, action[0], action[1]):
                #print('player0 win!!')
                win_main += 1
                break
            if ban.ban_fill():
                draw += 1
                break

            #print('player 1')
            player_side = 1
            state = chg_input_cnn(ban, player_side)

            if step <= 1:
                action = random.choice(ban.ban_put_available())
            else:
                action, _ = decide_action_func(new_model, ban, state)

            ban.ban_applay(player_side, action[0], action[1])
            #print(action)
            #ban.ban_print()
            if ban.ban_win(player_side, action[0], action[1]):
                #print('player1 win!!')
                win_new += 1
                break
            if ban.ban_fill():
                draw += 1
                break

        ban.ban_reset()
        step = 0
        while True:  #new_model先行
            step += 1
            #print('player 0 random')

            player_side = 0
            state = chg_input_cnn(ban, player_side)

            if step <= 1:  #ランダムに打つ
                action = random.choice(ban.ban_put_available())
            else:
                action, _ = decide_action_func(new_model, ban, state)

            ban.ban_applay(player_side, action[0], action[1])
            #print(action)
            #ban.ban_print()

            if ban.ban_win(player_side, action[0], action[1]):
                #print('player0 win!!')
                win_new += 1
                break
            if ban.ban_fill():
                draw += 1
                break

            #print('player 1')
            player_side = 1
            state = chg_input_cnn(ban, player_side)

            if step <= 1:  #ランダムに打つ
                action = random.choice(ban.ban_put_available())
            else:
                action, _ = decide_action_func(main_model, ban, state)

            ban.ban_applay(player_side, action[0], action[1])
            #print(action)
            #ban.ban_print()
            if ban.ban_win(player_side, action[0], action[1]):
                #print('player1 win!!')
                win_main += 1
                break
            if ban.ban_fill():
                draw += 1
                break

    win_rate = 100 * (win_new) / (win_main + win_new)
    return win_rate
예제 #6
0
    def decide_action(self, ban, model, player_side, search_depth, step,episode_sum ,ep_random_data ,fastmode=False):
        reward = 0
        if fastmode:#デバック用 NNを使わない
            #print("fastmode")
            
            action = random.choice(ban.ban_put_available())  # 行動をランダムに返す
            
            r = action[0]
            c = action[1]
            
            v_ary = np.zeros(self.BANSIZE)
            v_output = 0
            reward, r, c = 0, r, c
            
            ban_copy = copy.deepcopy(ban)
            ban_copy.ban_applay(player_side, r, c)#自分が打つ
            
            if ban_copy.ban_fill():
                terminal = True
            else:
                terminal = False
            
            return reward, r, c, None, None, terminal
        
        #print("fastmodeじゃないよ...")
        
        
        sample = random.random()
        eps_threshold = EPS_END + (EPS_START - EPS_END) * math.exp(-1. * episode_sum / EPS_DECAY)
        
        self.eps_threshold = eps_threshold

        '''
        if step <= 1:
            eps_threshold = max(0.6, eps_threshold)
        elif step == 2:
            eps_threshold = max(0.3, eps_threshold)
        elif step == 3:
            eps_threshold = max(0.1, eps_threshold)
        elif step == 4:
            eps_threshold = max(0.1, eps_threshold)
        elif step >= 5:
            eps_threshold = max(0.05, eps_threshold)
        '''


        if  sample > eps_threshold:# and step >= 1
            
            #手を探索する
            #p_ary= self.searchGameTree(ban, model, player_side, search_depth)
            
            #indexを大きい順に並べる
            
            '''
            p_ary_index = np.argsort(p_ary)[::-1]
            
            
            
            #print("vがmaxのindex = {}".format([int(v_ary_index[0]/14), v_ary_index[0] % 14]))
            ban_put_available = ban.ban_put_available()
            #print(ban_put_available)
            for index in p_ary_index:
                r,c = index2rc(index)
                if [r, c] in ban_put_available:
                    q = p_ary[index]
                    return r, c, p_ary, q
            '''
            reward, r, c, state, terminal = self.rtn_reward(ban, model, player_side)
            
            return reward, r, c, state, terminal
            
            
        else:
            #print("ランダム打ち")
            action = random.choice(ban.ban_put_available())  # 行動をランダムに返す
            r = action[0]
            c = action[1]
            #_, win_flag, p_ary, _, _, _= self.rtn_p_ary(ban, model, player_side, 0)
            
            index = rc2index(r,c)
            #q = p_ary[index]
                
            
            #for g in range(14):
            #    for r in range(14):
            #        print("{:0=+03.3f} ".format(v_ary[14*g + r]), end="")
            #
            #    print('')


            reward, r, c = 0, r, c
            
            ban_copy = copy.deepcopy(ban)
            state = chg_input_cnn(ban_copy, player_side)
            ban_copy.ban_applay(player_side, r, c)#自分が打つ
            
            if ban_copy.ban_fill():
                terminal = True
                reward = 0
                return reward, r, c, state, terminal
            
            elif ban_copy.ban_win(player_side, r, c):
                reward = 1
                terminal = True
                #print("win")
                return reward, r, c, state, terminal
            else:
                reward = 0
                terminal = False
                #print("continue")
                return reward, r, c, state, terminal
예제 #7
0
    def rtn_reward(self, ban, model, player_side):
        reward = 0
        ban_copy = copy.deepcopy(ban)
        
        state = chg_input_cnn(ban_copy, player_side)
        p_ary , _ = model(state.to(self.device))
        p_ary = p_ary.detach().cpu().numpy()[0]
        ban_put_available = ban_copy.ban_put_available()
        
        
        #print("--------------")
        #print("player_side", player_side)
        #ban.ban_print()
        #print(p_ary)
        for i in range(random_search_value):
            #print(ban_put_available)

            q_ary_for_w = []

            for [r,c] in ban_put_available:
                index = rc2index(r,c)
                q_ary_for_w.append(p_ary[index])

            w = self.softmax_numpy(q_ary_for_w, 1/(1+3*i))
            #print("q_ary_for_w", q_ary_for_w)
            #print("weights", w)
            #print("ban_put_available", ban_put_available)
            #action = random.choices(ban_put_available, weights=w)[0]
            action = random.choices(ban_put_available)[0]
            #print(action)
            q = self.rtn_q(ban, model, player_side, action)
            index = rc2index(action[0], action[1])
            #print(i, action, q)
            p_ary[index] = q*0.99 + p_ary[index]*(1-0.99)
        #print(p_ary)
        
        
        p_ary_index = np.argsort(p_ary)[::-1] 
            
        
        #print(ban_put_available)
        
        for index in p_ary_index:
            r,c = index2rc(index)
            if [r, c] in ban_put_available:
                break
        
        ban_copy.ban_applay(player_side, r, c)
        
        #ban_copy.ban_print()
        
        if ban_copy.ban_win(player_side, r, c):
            
            reward = 1
            terminal = True
            #print("win")
            return reward, r, c, state, terminal
        
        elif ban_copy.ban_fill():
            #print("もう打てないよ!!")
            reward = 0
            terminal = True
            #print("fill")
            return reward, r, c, state, terminal

        
        else:
            reward = 0
            terminal = False
            #print("continue")
            return reward, r, c, state, terminal