def Liuju(self):
        # 4风连打
        if max(self.xunshu) == 1 and min(self.xunshu) == 1:
            count = [0, 0, 0, 0]
            dic = {'1z': 0, '2z': 1, '3z': 2, '4z': 3}
            for i in range(4):
                if (len(self.riverStack[i]) > 0):
                    p = self.riverStack[i][0]
                    if (p in dic):
                        count[dic[p]] += 1
            if max(count) == 4:
                print('\n4风连打')
                self.realfenpei = [0, 0, 0, 0]
                self.lianzhuang = True
                self.endSection = True
                return True
        # 4家立直
        if max(self.playerLizhi) == 1 and min(self.playerLizhi) == 1:
            print('\n四家立直', self.score, self.lizhibang)
            self.realfenpei = [-1000, -1000, -1000, -1000]
            self.lianzhuang = True
            self.endSection = True
            return True

        # 计算4家是否听牌
        if (self.yamaPos == self.yamaLast - 14):
            xiangting = [0, 0, 0, 0]
            tingpai = [[], [], [], []]
            self.realfenpei = [0, 0, 0, 0]
            for i in range(4):
                pcount = PaiMaker.GetCount(self.handStack[i])
                xiangting[i] = TingJudger.xiangting(pcount, self.fuluStack[i])
                if (xiangting[i] == 0):
                    tingpai[i] = TingJudger.tingpai(pcount, self.fuluStack[i])
                # 庄家是否听牌连庄
                if (self.playerWind[i] == 0):
                    self.lianzhuang = xiangting[i] == 0
                    if (self.lianzhuang):
                        self.realfenpei[i] = 500
            # 点数更新(id顺)
            tingSum = len(list(filter(lambda t: t == 0, xiangting)))
            if (tingSum > 0 and tingSum < 4):
                for i in range(4):
                    if xiangting[i] == 0:
                        self.realfenpei[i] += 3000 / tingSum
                    else:
                        self.realfenpei[i] += -3000 / (4 - tingSum)
                    self.score[i] += self.realfenpei[i]
            print('\n荒牌流局 {} 连庄{}'.format(self.realfenpei, self.lianzhuang))
            for i in range(4):
                print('p{}:{}向听 手牌{}{}'.format(
                    i, xiangting[i], PaiMaker.GetSortPai(self.handStack[i]),
                    self.fuluStack[i]))
            self.endSection = True
            return True

        return False
    def allow_zimo(self, pid, mopai):
        msg = {}
        msg['tile'] = mopai
        # 判定可能出现的选项
        msg['operation'] = []
        # 处于立直状态 则无法改牌
        msg['lizhi_state'] = self.playerLizhi[pid] > 0
        if self.playerLizhi[pid] == 0:
            msg['operation'].append({'combination': [], 'type': 1})
        # 获取信息
        fulu = self.fuluStack[pid]
        # 是否允 暗/加杠 (最后一张不允许杠)
        if (self.yamaLast - 14) != self.yamaPos:
            oriCount = PaiMaker.GetCount(self.handStack[pid])
            gangmz = MianziMaker.get_gang_mianzi(oriCount, fulu, mopai + '_')
            for mz in gangmz:
                msg['operation'].append({'combination': mz[0], 'type': mz[1]})

        # 是否能自摸
        param = self.getPlayerParam(pid)
        param['haidi'] = 0
        if (self.yamaLast - 14) == self.yamaPos:
            param['haidi'] = 1
        ptr = PtJudger.GetFen(self.handStack[pid], fulu, mopai + '_', param)
        if ptr['hupai'] or ptr['defen'] > 0:
            # print(ptr)
            msg['operation'].append({'combination': mopai + '_', 'type': 8})

        # 是否可以立直
        paiCount = PaiMaker.GetCount(self.handStack[pid])
        # 向听数
        xt = TingJudger.xiangting(paiCount, fulu)
        # 是否门清
        menqing = True
        for f in fulu:
            if re.match('[\-\+\=](?!\!)', f):
                menqing = False

        if (self.playerLizhi[pid] == 0 and menqing and xt == 0
                and (self.yamaLast - 14) - self.yamaPos >= 4
                and self.score[pid] >= 1000):
            lizhipai = TingJudger.FindLizhi(self.handStack[pid], fulu,
                                            str(self.riverStack[pid]))
            msg['operation'].append({'combination': lizhipai, 'type': 7})
        # 九种九牌流局
        #msg.liuju = false
        self.msgList[pid] = msg
 def isPlayerZhenting(self, i):
     ''' 判断是否振听
     '''
     pCount = PaiMaker.GetCount(self.handStack[i])
     tingpai = TingJudger.tingpai(pCount, self.fuluStack[i])
     riverstr = str(self.riverStack[i])
     # 是否有任何一张听牌是否出现在牌河内
     return TingJudger.IsZhenting(tingpai, riverstr)
 def fulu_xiangting(self, hand, fulu, pid):
     shoupai = PaiMaker.GetCount(hand)
     # 选择向听数最小的
     menqing = self.xiangting_menqing(shoupai, fulu)
     fanpai = self.xiangting_fanpai(shoupai, fulu, self.changfeng + 1,
                                    self.playerWind[pid] + 1)
     duanyao = self.xiangting_duanyao(shoupai, fulu)
     duidui = self.xiangting_duidui(shoupai, fulu)
     yisem = self.xiangting_yise(shoupai, fulu, 'm')
     yisep = self.xiangting_yise(shoupai, fulu, 'p')
     yises = self.xiangting_yise(shoupai, fulu, 's')
     # print({"鸣牌":fulu,"门清":menqing,"役牌":fanpai,"断幺九":duanyao,"对对":duidui,"m清/混一色":yisem,"p清/混一色":yisep,"s清/混一色":yises});
     return min(menqing, fanpai, duanyao, duidui, yisem, yisep, yises)
 def resetParam(self):
     ''' 重开发牌
     '''
     # 玩家手牌
     self.handStack = [[], [], [], []]
     # 牌河
     self.riverStack = [[], [], [], []]
     # 鸣牌区域
     self.fuluStack = [[], [], [], []]
     # 随机生成新牌堆
     self.yama = PaiMaker.GeneratePai()
     # 牌顶位置
     self.yamaPos = -1
     # 杠牌位置
     self.yamaLast = 135
     # 宝牌位置
     self.baoPos = 130
     # 翻开第一张宝牌 (宝牌倒着开)
     self.bao = [self.yama[self.baoPos]]
     self.libao = [self.yama[self.baoPos + 1]]
     # 下个宝牌
     self.baoPos -= 2
     # 4家立直
     self.playerLizhi = [0, 0, 0, 0]
     # 4家巡数
     self.xunshu = [0, 0, 0, 0]
     # 首巡
     self.diyizimo = [True, True, True, True]
     # 振听状态
     self.zhenting = [False, False, False, False]
     # 杠后牌
     self.gangflag = False
     # 一发状态
     self.yifa = [False, False, False, False]
     # 点数分配
     self.realfenpei = [0, 0, 0, 0]
     # 当前轮到的玩家
     self.curWind = 0
     # 单局结束
     self.endSection = False
     # 是否连庄
     self.lianzhuang = False
     # 终庄指示
     self.endGame = False
    def make_humsg(self, submsg):
        print(submsg)
        pid = submsg['from']
        hand = copy.copy(self.handStack[pid])
        tile = submsg['tile']
        if (tile[2] != '_'):
            # 他人胡牌,手牌需加上荣牌
            hand.append(tile)

        param = self.getPlayerParam(pid)
        param['haidi'] = 0
        if (self.yamaLast - 14) == self.yamaPos:
            param['haidi'] = 1
            if tile[2] != '_':
                param['haidi'] = 2

        param['tianhu'] = 0
        if (self.diyizimo[pid] and tile[2] == '_'):
            param['tianhu'] = 2
            if (param['zhuangfeng'] == 0):
                param['tianhu'] = 1

        ptres = PtJudger.GetFen(hand, self.fuluStack[pid], tile, param)
        handstr = PaiMaker.GetSortPai(self.handStack[pid])
        print(pid, '胡了', handstr, tile, self.fuluStack[pid])
        print(param)
        # print('宝牌:{} 里宝:{}'.format(self.bao, self.libao))
        if (not 'hupai' in ptres or ptres['hupai'] == None):
            raise Exception('无役和了')
        print(ptres['hupai'])
        print('{}符{}番 {}'.format(ptres['fu'], ptres['fanshu'], ptres['defen']))

        # 处理得分
        for i in range(4):
            # 将fenpei的风顺序  改为玩家顺序
            wind = self.playerWind[i]
            self.realfenpei[i] += ptres['fenpei'][wind]
            self.score[i] += ptres['fenpei'][wind]

        ptres['fenpei'] = self.realfenpei
        # 连庄判定
        if (self.playerWind[pid] == 0):
            self.lianzhuang = True
Beispiel #7
0
def update():
    for episode in range(5):
        print('episode{} start\n'.format(episode))
        start = time.time()
        # initial observation
        observation = env.newgame()
        # 记录开局后所有state
        statelist = [[],[],[],[]]
        while True:
            # fresh env
            env.render()
            # 可行的操作
            possible = observation.getActions()
            # print(possible)
            # agent 作出的选择
            action = [None,None,None,None]
            # 发回给env的参数
            msgList = [None,None,None,None]
            for k,act in enumerate(possible):
                if act == None or len(act) == 0:
                    continue
                res = Plist(act,observation,k)
                code = PaiMaker.coding(observation,k)
                # RL choose action based on observation
                action[k] = RL.choose_action(code,list(res.keys()))
                # 记录state与action
                statelist[k].append({'state':code,'action':action[k]})
                # msg sender
                msgList[k] = msgBuilder(action[k],k,res)
            #print('act: {}'.format(action))

            # RL take action and get next observation and reward
            observation_, reward, done = env.step(msgList)
            #print('reward: {}'.format(reward))
            
            for k,act in enumerate(action):
                if act == None:
                    continue
                code_ = PaiMaker.coding(observation_,k)
                # RL learn from this transition
                RL.learn(code, act, reward[k], code_)

            # swap observation
            observation = observation_

            if env.endSection:
                # 有奖励则将整个过程学习
                for i in range(4):
                    if reward[i] > 0:
                        st = statelist[i]
                        print(st)
                        for k in range(len(st)):
                            state_old = st[k]['state']
                            if(k+1 == len(st)):
                                state_next = 'terminal'
                            else:
                                state_next = st[k+1]['state']
                            act = st[k]['action']
                            RL.learn(state_old,act,reward[i],state_next)
                # 清空
                statelist = [[],[],[],[]]
                break
            
            # break while loop when end of this episode
            if done:
                break
        end = time.time()
        print('episode{} end, time:{:.2f}, total {} game played'.format(episode,end-start,env.pp))
        # 5个episode保存qtable
        #if episode % 5 == 0:
        #RL.save()

    print('train end')
Beispiel #8
0
        t = opt[action]['type']
        if(t == 5):
            dic['type'] = 'chipenggang'
        else:
            dic['type'] = 'angangjiagang'
        combination = opt[action]['combination']
        dic['combination'] = [combination, t]
    elif(action == 40):
        print(opt[action])
        t = opt[action]['type']
        dic['tile'] = opt[action]['combination']
        if(t == 8):
            dic['type'] = 'hu'
        elif(t == 9):
            dic['type'] = 'zimo'
    elif(action == 41):
        pass
    return dic

if __name__ == "__main__":
    vis = False
    print(len(sys.argv))
    if len(sys.argv) > 1:
        # vis = True
        pass
    env = Environment(rule=Rule(),yama=PaiMaker.GeneratePai(1))
    RL = QLearningTable(actions=list(range(41)))
    update()

    # env.after(100, update)
    # env.mainloop()