Example #1
0
 def __init__(self):
     super().__init__()
     self.env = Env.OthelloEnv()
     self.initUI()
     self.i = 0
     self.Q = test.Q
Example #2
0
def f(state, action):
    e = Env.OthelloEnv()
    e.state = state
    s, r, d = e._step(action)
    return s, d
Example #3
0
def main():
    value_list = []
    env = Env.OthelloEnv()
    Value = mc_prediction(random_policy, env, num_episodes=10)
    Value[27] = 1
    Value[28] = 1
    Value[35] = 1
    Value[36] = 1
    for i in range(64):
        value_list.append(Value[i])
    # 现有的根据经验得到的矩阵
    # value_list = [10, -9, 8, 4, 4, 8, -9, 10,
    #               -9, -9, -4, -3, -3, -4, -9, -9,
    #               8, -4, 8, 2, 2, 8, -4, 8,
    #               4, 3, 2, 1, 1, 2, 3, 4,
    #               4, 3, 2, 1, 1, 2, 3, 4,
    #               8, -4, 8, 2, 2, 8, -4, 8,
    #               -9, -9, -4, -3, -3, -4, -9, -9,
    #               10, -9, 8, 4, 4, 8, -9, 10]

    # 打印state矩阵
    for i in range(0, 8):
        for j in range(0, 8):
            print(value_list[8 * i + j], end=' ')
            print()
        print()

    # 下棋开始
    def mid_policy(state, flag):
        st = judge(state, flag)
        l = len(st)
        if (l == 0):
            return [0, 0]
        else:
            action = []
            for i in range(l):
                action.append(value_list[i])
            num = max(action)
            p = action.index(num)
            return [flag, st[p]]

    def f(env):
        total = 0
        s = env._reset()
        flag = 1
        action = mid_policy(s, flag)
        d = False
        while not d:
            s, r, d = env._step(action)
            # for i in range(0,8):
            #             #     for j in range(0,8):
            #             #         if(s[8*i+j]==-1):
            #             #             print(s[8*i+j],end=' ')
            #             #         else:
            #             #             print(s[8*i+j],end='  ')
            #             #     print()
            #             # print()
            flag = -flag
            if (flag == 1):
                action = mid_policy(s, flag)
            else:
                action = random_policy(s, flag)
        for i in range(0, 8):
            for j in range(0, 8):
                total += s[8 * i + j]
        # print(total)
        if (total > 0):
            return 1
        else:
            return 0

    env = Env.OthelloEnv()
    f(env)
    win = 0
    for i in range(1000):
        win += f(env)
    print(win / 1000)
Example #4
0
import sys
import random

from collections import defaultdict
import Othello as Env
from Othello import judge


def random_policy(state, flag):
    st = judge(state, flag)
    l = len(st)
    if l == 0:
        return [0, 0]
    else:
        p = random.randint(0, l - 1)
        return [flag, st[p]]


def f(state, action):
    e = Env.OthelloEnv()
    e.state = state
    s, r, d = e._step(action)
    return s, d


env = Env.OthelloEnv()
env._reset()
ss, dd = f(env.state, random_policy(env.state, 1))
print(ss)
print(dd)