예제 #1
0
 def opponent(self):
     if self.mark.opponent() == 1:
         return Maru()
     elif self.mark.opponent() == 0:
         return Empty()
     elif self.mark.opponent() == -1:
         return Batsu()
 def is_draw(self):
     '''
     盤面の状態が引き分けかどうか
     return:
         True:引き分け
         False:引き分けではない(勝負がついていない)
     '''
     if len(self.get_valid_actions()) == 0 and not self.is_win(Mark(
             Maru())) and not self.is_win(Mark(Batsu())):
         return True
     return False
 def is_end(self):
     '''
     盤面の状態がゲーム修了かどうか
     return:
         True:修了
         False:修了していない(勝負がついていない)
     '''
     if self.is_win(Mark(Maru())) or self.is_win(Mark(Batsu())) or len(
             self.get_valid_actions()) == 0:
         return True
     return False
예제 #4
0
                # 勝者の報酬
                current_player.learn(1, True)
                # 敗者の報酬
                self.players[result.opponent().to_int()].learn(-1, True)
                if verbose:
                    print("%s" % (state.to_array()))
                    print("-" * 5)
                    state.output()
                    print("-" * 5)
                    print("%s win!!!" %
                          (self.players[current_player_mark].mark.to_string()))
                break
            elif state.is_draw():
                result = Mark(Empty())
                for player in self.players.itervalues():
                    player.learn(0, True)
                if verbose:
                    state.output()
                    print("draw.")
                break
            current_player_mark = self.players[
                current_player_mark].mark.opponent().to_int()
            #print("="*30)


if __name__ == '__main__':
    player1 = Player(Mark(Maru()))
    player2 = Player(Mark(Batsu()))
    game = Game(player1, player2)
    game.start(verbose=True)
    print new_state.get(0).to_int()
    print state1.to_array()
    print new_state.to_array()
    print state1.win_state
    print state1.is_win(Mark(Maru()))
    maru1 = Mark(Maru())
    maru2 = Mark(Maru())
    print maru1.to_int() == maru2.to_int()
    new_state2 = new_state.set(1, Mark(Maru()))
    new_state3 = new_state2.set(2, Mark(Maru()))
    print new_state3.to_array()
    print new_state3.is_win(Mark(Maru()))
    print new_state2.is_draw()
    print new_state2.is_end()
    print new_state3.is_end()
    new_state = state1.set(0, Mark(Batsu()))
    new_state2 = new_state.set(1, Mark(Maru()))
    new_state4 = new_state2.set(2, Mark(Batsu()))
    new_state5 = new_state4.set(3, Mark(Maru()))
    new_state6 = new_state5.set(4, Mark(Batsu()))
    new_state7 = new_state6.set(5, Mark(Batsu()))
    new_state8 = new_state7.set(6, Mark(Maru()))
    new_state9 = new_state8.set(7, Mark(Batsu()))
    new_state10 = new_state9.set(8, Mark(Maru()))
    print new_state10.to_array()
    print new_state10.is_draw()

    new_state10.output()

    state11 = State()
    state12 = State()
예제 #6
0
            状態
            報酬
            事後状態から行動を行った後の状態
        '''
        if next_state is None:
            next_state_value = 0.0
        else:
            #状態価値テーブルから価値を取得
            next_state_value = self.value[next_state]
        # 行動価値の更新式
        self.value[state] += self.step_size * (reward + next_state_value -
                                               self.value[state])
        #print("value[state] %f", self.value[state])


if __name__ == '__main__':
    value = Value()
    state = State()
    #print value.get_value(state)
    #print value.get_max_action(state, Mark(Maru()))
    new_state = state.set(3, Mark(Maru()))
    new_state1 = state.set(4, Mark(Maru()))
    new_state2 = new_state.set(2, Mark(Batsu()))
    value.update(state, 10, new_state)
    value.update(new_state, 10, new_state2)
    value.update(new_state, 10, new_state2)
    value.update(new_state1, 10, new_state2)
    value.update(new_state1, 100, new_state2)
    #value.update(state, -3, new_state2)
    print value.get_max_action(state, Mark(Maru()))
from maru_mark import Maru
from batsu_mark import Batsu
from empty_mark import Empty

from tic_tac_toe_state import State

from tic_tac_toe_game import Game

# 共通行動価値テーブル
value = Value()

print("Sarsa method:")
print("Training com1 and com2.")

com_1 = SarsaCom(Mark(Maru()), value)
com_2 = SarsaCom(Mark(Batsu()), value)

print("Input the number of iterations:")

iterations = 10000
while(True):
    input_line = raw_input()
    if input_line.isdigit():
        iterations = int(input_line)
        break
    else:
        print("Input number:")

for i in xrange(iterations):
    game = Game(com_1, com_2)
    game.start()
예제 #8
0
#encoding: utf-8
'''
tic tac toe のSarsa λ実行クラス
'''
from tic_tac_toe_sarsa_r_com import SarsaRCom
from tic_tac_toe_nn_com import SarsaNNCom

from mark import Mark
from maru_mark import Maru
from batsu_mark import Batsu
from tic_tac_toe_game import Game

import dill

com_1 = SarsaNNCom(Mark(Maru()), 0.1, 0.1, 0.6)
com_2 = SarsaRCom(Mark(Batsu()), 0.1, 0.1, 0.6)

iterations = 100000
print("Input the number of iterations (%d):" % (iterations))
while(True):
    input_line = raw_input()
    if input_line.isdigit():
        iterations = int(input_line)
        break
    elif input_line == '':
        break
    else:
        print("Input number:")

# 学習
for i in xrange(iterations):
# モデルのロード
with open('tic_tac_toe_com_1_sarsa_r.pkl', 'rb') as f:
    com_1 = dill.load(f)

with open('tic_tac_toe_com_2_sarsa_r.pkl', 'rb') as f:
    com_2 = dill.load(f)

while (True):
    print("Select a type of fight [1, 2, 3, q]")
    print("1: human vs com2")
    print("2: com1 vs human")
    print("3: com1 vs com2")
    print("q: quit")

    type_of_fight = 1
    input_line = raw_input()
    if input_line.isdigit():
        type_of_fight = int(input_line)
    else:
        if input_line == 'q':
            break
        continue

    if type_of_fight == 1:
        game = Game(Player(Mark(Maru())), com_2)
    elif type_of_fight == 2:
        game = Game(com_1, Player(Mark(Batsu())))
    elif type_of_fight == 3:
        game = Game(com_1, com_2)
    game.start(True)
예제 #10
0
    def opponent(self):
        if self.mark.opponent() == 1:
            return Maru()
        elif self.mark.opponent() == 0:
            return Empty()
        elif self.mark.opponent() == -1:
            return Batsu()

    def to_int(self):
        return self.mark.to_int()

    def to_string(self):
        return self.mark.to_string()


if __name__ == '__main__':
    maru = Mark(Maru())
    print maru.to_string()
    print maru.to_int()
    print maru.opponent().to_string()
    print '=' * 10
    batsu = Mark(Batsu())
    print batsu.to_string()
    print batsu.to_int()
    print batsu.opponent().to_string()
    print '=' * 10
    empty = Mark(Empty())
    print empty.to_string()
    print empty.to_int()
    print empty.opponent().to_string()