Ejemplo n.º 1
0
def try_mro():
    from MrIf import MrIf
    from OfflineInterface import OfflineInterface
    import itertools,numpy
    #netpara="MrO_Net_9_164609.pkl"
    netpara="MrO_Net_15_395265.pkl"
    o0=MrO(netpara,room=0,place=0,name='o0')
    o2=MrO(netpara,room=0,place=2,name='o2')
    f1=MrIf(room=0,place=1,name="if1")
    f3=MrIf(room=0,place=3,name="if3")
    offlineinterface=OfflineInterface([o0,f1,o2,f3],print_flag=False)
    N1=64;N2=2
    stats=[]
    for k,l in itertools.product(range(N1),range(N2)):
        if l==0:
            cards=offlineinterface.shuffle()
        else:
            cards=cards[39:52]+cards[0:39]
            offlineinterface.shuffle(cards=cards)
        for i,j in itertools.product(range(13),range(4)):
            offlineinterface.step()
        stats.append(offlineinterface.clear())
        if l==N2-1:
            print("%4d"%(sum([j[0]+j[2]-j[1]-j[3] for j in stats[-N2:]])),end=" ",flush=True)
        offlineinterface.prepare_new()
    s_temp=[j[0]+j[2]-j[1]-j[3] for j in stats]
    print("%.2f %.2f"%(numpy.mean(s_temp),numpy.sqrt(numpy.var(s_temp)/(len(s_temp)-1)),))
Ejemplo n.º 2
0
def gen_data_for_o(N1=2,N2=1,save=False):
    from OfflineInterface import OfflineInterface
    import pickle
    global for_o
    for_o=([],[],[],[])
    g=[MrGreed(room=0,place=i,name='greed%d'%(i)) for i in range(4)]
    offlineinterface=OfflineInterface(g,print_flag=False)
    stats=[]
    for k,l in itertools.product(range(N1),range(N2)):
        if l==0:
            cards=offlineinterface.shuffle()
        else:
            cards=cards[39:52]+cards[0:39]
            offlineinterface.shuffle(cards=cards)
        for i,j in itertools.product(range(13),range(4)):
            offlineinterface.step()
        stats.append(offlineinterface.clear())
        print(".",end=" ",flush=True)
        offlineinterface.prepare_new()
    print("")
    if save:
        with open("Greed_%d.data"%(N1),'wb') as f:
            pickle.dump(for_o,f)
        log("saved")
    return for_o
Ejemplo n.º 3
0
def optimize_target(paras):
    """
        will be called by optimize_para to optimize parameters of MrGreed
        should import:
    from MrIf import MrIf
    from OfflineInterface import OfflineInterface
    import numpy
    """
    print(paras,end=" ",flush=True)
    g0=MrGreed(room=0,place=0,name='greed0')
    g2=MrGreed(room=0,place=2,name='greed2')
    f1=MrIf(room=0,place=1,name="if1")
    f3=MrIf(room=0,place=3,name="if3")
    for g in [g0,g2]:
        g.SHORT_PREFERENCE=paras[0]*100
    offlineinterface=OfflineInterface([g0,f1,g2,f3],print_flag=False)
    N1=256;N2=2
    stats=[]
    for k,l in itertools.product(range(N1),range(N2)):
        if l==0:
            cards=offlineinterface.shuffle()
        else:
            cards=cards[39:52]+cards[0:39]
            offlineinterface.shuffle(cards=cards)
        for i,j in itertools.product(range(13),range(4)):
            offlineinterface.step()
        stats.append(offlineinterface.clear())
        offlineinterface.prepare_new()
    s_temp=[j[0]+j[2]-j[1]-j[3] for j in stats]
    print("%.2f %.2f"%(numpy.mean(s_temp),numpy.sqrt(numpy.var(s_temp)/(len(s_temp)-1)),))
    return numpy.mean(s_temp)
Ejemplo n.º 4
0
def benchmark_B(handsfile):
    from MrIf import MrIf
    from MrGreed import MrGreed
    from MrZeroTree import MrZeroTree
    from OfflineInterface import OfflineInterface, read_std_hands, play_a_test

    ifs = [MrIf(room=255, place=i, name='I%d' % (i)) for i in range(4)]
    gs = [MrGreed(room=255, place=i, name='G%d' % (i)) for i in range(4)]
    zs = [
        MrZeroTree(room=255,
                   place=i,
                   name='Z%d' % (i),
                   mcts_b=10,
                   mcts_k=2,
                   sample_b=-1,
                   sample_k=-2) for i in [0, 2]
    ]
    I_GI = OfflineInterface([gs[0], ifs[1], gs[2], ifs[3]], print_flag=False)
    I_ZG = OfflineInterface([zs[0], gs[1], zs[1], gs[3]], print_flag=False)

    hands = read_std_hands(handsfile)
    stats = []
    for k, hand in hands:
        stats.append(play_a_test(I_ZG, hand, 2))
        print("%4d" % (stats[-1], ), end=" ", flush=True)
    else:
        print("")
    log("benchmark result: %.2f %.2f" %
        (numpy.mean(stats), numpy.sqrt(numpy.var(stats) / (len(stats) - 1))))
Ejemplo n.º 5
0
def benchmark():
    from MrRandom import MrRandom, Human
    from MrIf import MrIf
    from OfflineInterface import OfflineInterface
    g = [MrGreed(room=0, place=i, name='greed%d' % (i)) for i in range(4)]
    f = [MrIf(room=0, place=i, name="if%d" % (i)) for i in range(4)]
    r = [MrRandom(room=0, place=i, name="random%d" % (i)) for i in range(4)]
    rt = [
        MrRandTree(room=0, place=i, name='randtree%d' % (i)) for i in range(4)
    ]

    offlineinterface = OfflineInterface([f[0], g[1], f[2], g[3]],
                                        print_flag=False)
    N1 = 1024
    N2 = 2
    stats = []
    log("%s vs. %s for %dx%d" %
        (offlineinterface.players[0].family_name(),
         offlineinterface.players[1].family_name(), N1, N2))
    tik = time.time()
    for k, l in itertools.product(range(N1), range(N2)):
        if l == 0:
            cards = offlineinterface.shuffle()
        else:
            cards = cards[39:52] + cards[0:39]
            offlineinterface.shuffle(cards=cards)
        for i, j in itertools.product(range(13), range(4)):
            offlineinterface.step()
            """if i==7 and j==2:
                global print_level
                print_level=1
                offlineinterface.print_flag=True
                log("start outputs")"""
        stats.append(offlineinterface.clear())
        offlineinterface.prepare_new()
        if l == N2 - 1:
            print("%4d" %
                  (sum([j[0] + j[2] - j[1] - j[3] for j in stats[-N2:]]) / N2),
                  end=" ",
                  flush=True)
        #print("%s"%(stats[-1]),end=" ",flush=True)
    tok = time.time()
    log("time consume: %ds" % (tok - tik))
    for i in range(4):
        s_temp = [j[i] for j in stats]
        log("%dth player: %.2f %.2f" % (
            i,
            numpy.mean(s_temp),
            numpy.sqrt(numpy.var(s_temp) / (len(s_temp) - 1)),
        ),
            l=2)
    s_temp = [j[0] + j[2] - j[1] - j[3] for j in stats]
    log("%.2f %.2f" %
        (numpy.mean(s_temp), numpy.sqrt(numpy.var(s_temp) /
                                        (len(s_temp) - 1))))
Ejemplo n.º 6
0
def benchmark_transitivity(print_process=False):
    from MrRandom import MrRandom
    from MrIf import MrIf
    from MrGreed import MrGreed
    from MrRandTree import MrRandTree
    from MrZ_NETs import PV_NET_2
    from MrZeroTreeSimple import MrZeroTreeSimple
    from OfflineInterface import OfflineInterface
    import itertools,torch,random,inspect

    device_bench=torch.device("cuda:0")
    save_name_0="Zero-29th-25-11416629-720.pt"
    state_dict_0=torch.load(save_name_0,map_location=device_bench)
    pv_net_0=PV_NET_2()
    pv_net_0.load_state_dict(state_dict_0)
    pv_net_0.to(device_bench)
    team0=[MrZeroTreeSimple(room=255,place=i,name='zts%d'%(i),pv_net=pv_net_0,device=device_bench,mcts_b=10,mcts_k=2,sample_b=9,sample_k=0) for i in [0,2]]
    #team0=[MrRandTree(room=255,place=i,name='randtree%d'%(i)) for i in [0,2]]

    #team1=[MrRandTree(room=255,place=i,name='randtree%d'%(i)) for i in [1,3]]
    #team1=[MrGreed(room=255,place=i,name='greed%d'%(i)) for i in [1,3]]
    #team1=[MrRandom(room=255,place=i,name='random%d'%(i)) for i in [1,3]]
    team1=[MrIf(room=255,place=i,name='if%d'%(i)) for i in [1,3]]
    interface=OfflineInterface([team0[0],team1[0],team0[1],team1[1]],print_flag=False)

    N1=256;N2=2;
    log("(%s+%s) v.s. (%s+%s) for %dx%d"%(interface.players[0].family_name(),interface.players[2].family_name(),
                                            interface.players[1].family_name(),interface.players[3].family_name(),N1,N2))
    if interface.players[0].family_name().startswith("MrZeroTree"):
        log("mcts_b/k: %d/%d, sample_b/k: %d/%d"%(interface.players[0].mcts_b,interface.players[0].mcts_k,
                                                  interface.players[0].sample_b,interface.players[0].sample_k))
    stats=[]
    for k,l in itertools.product(range(N1),range(N2)):
        if l==0:
            cards=interface.shuffle()
        else:
            cards=cards[39:52]+cards[0:39]
            interface.shuffle(cards=cards)
        for i,j in itertools.product(range(13),range(4)):
            interface.step()
            #input("continue...")
        stats.append(interface.clear())
        interface.prepare_new()
        if l==N2-1:
            if print_process:
                log("%2d %4d: %s"%(k,sum([j[0]+j[2]-j[1]-j[3] for j in stats[-N2:]])/N2,stats[-N2:]))
            else:
                print("%4d"%(sum([j[0]+j[2]-j[1]-j[3] for j in stats[-N2:]])/N2),end=" ",flush=True)
        if (k+1)%(N1//4)==0 and l==N2-1:
            bench_stat(stats,N2,None)
    bench_stat(stats,N2,None)
def prepare_data_queue(pv_net, device_num, data_rounds, train_b, train_k,
                       data_queue):
    input("not using")
    device_train = torch.device("cuda:%d" % (device_num))
    pv_net.to(device_train)
    zt = [
        MrZeroTreeSimple(room=0,
                         place=i,
                         name='zerotree%d' % (i),
                         pv_net=pv_net,
                         device=device_train,
                         train_mode=True,
                         mcts_b=train_b,
                         mcts_k=train_k) for i in range(4)
    ]
    interface = OfflineInterface(zt, print_flag=False)
    stats = []
    for k in range(data_rounds):
        cards = interface.shuffle()
        for i in range(52):
            interface.step_complete_info()
        stats.append(interface.clear())
        interface.prepare_new()

    for i in range(4):
        data_queue.put(zt[i].train_datas, block=False)
def benchmark(save_name, epoch, device_num, print_process=False):
    """
        benchmark raw network against MrGreed
        will be called by trainer
    """
    import itertools, numpy

    N1 = 512
    N2 = 2
    log("start benchmark against MrGreed for %dx%d" % (N1, N2))

    zt = [
        MrZeroTreeSimple(room=255,
                         place=i,
                         name='zerotree%d' % (i),
                         pv_net=save_name,
                         device="cuda:%d" % (device_num),
                         mcts_b=0,
                         mcts_k=1,
                         sample_b=BENCH_SMP_B,
                         sample_k=BENCH_SMP_K) for i in [0, 2]
    ]
    g = [MrGreed(room=255, place=i, name='greed%d' % (i)) for i in [1, 3]]
    interface = OfflineInterface([zt[0], g[0], zt[1], g[1]], print_flag=False)

    stats = []
    for k, l in itertools.product(range(N1), range(N2)):
        if l == 0:
            cards = interface.shuffle()
        else:
            cards = cards[39:52] + cards[0:39]
            interface.shuffle(cards=cards)
        for i, j in itertools.product(range(13), range(4)):
            interface.step()
        stats.append(interface.clear())
        interface.prepare_new()
        if print_process and l == N2 - 1:
            print("%4d" %
                  (sum([j[0] + j[2] - j[1] - j[3] for j in stats[-N2:]]) / N2),
                  end=" ",
                  flush=True)
    s_temp = [j[0] + j[2] - j[1] - j[3] for j in stats]
    s_temp = [sum(s_temp[i:i + N2]) / N2 for i in range(0, len(s_temp), N2)]
    log("benchmark at epoch %s's result: %.2f %.2f" %
        (epoch, numpy.mean(s_temp),
         numpy.sqrt(numpy.var(s_temp) / (len(s_temp) - 1))))
Ejemplo n.º 9
0
def benchmark(handsfile, print_process=False):
    from MrGreed import MrGreed
    from MrZeroTree import MrZeroTree
    from OfflineInterface import OfflineInterface, read_std_hands, play_a_test
    import torch, inspect

    log_source(inspect.getsource(MrZeroTree.decide_rect_necessity))
    #log_source(inspect.getsource(MrZeroTree.possi_rectify_pvnet))

    zt0 = [
        MrZeroTree(room=255,
                   place=i,
                   name='zerotree%d' % (i),
                   mcts_b=10,
                   mcts_k=2,
                   sample_b=-1,
                   sample_k=-2) for i in [0, 2]
    ]
    team1 = [MrGreed(room=255, place=i, name='greed%d' % (i)) for i in [1, 3]]
    interface = OfflineInterface([zt0[0], team1[0], zt0[1], team1[1]],
                                 print_flag=False)

    if interface.players[0].family_name().startswith("MrZeroTree"):
        p0 = interface.players[0]
        log("mcts_b/k: %d/%d, sample_b/k: %d/%d" %
            (p0.mcts_b, p0.mcts_k, p0.sample_b, p0.sample_k))

    hands = read_std_hands(handsfile)
    N1 = len(hands)
    N2 = 2
    log("%s for %dx%d on %s" % (interface, N1, N2, zt0[0].device))
    stats = []
    for k, hand in hands:
        stats.append(play_a_test(interface, hand, N2))
        print("%4d" % (stats[-1], ), end=" ", flush=True)
        if (k + 1) % (N1 // 4) == 0:
            bench_stat(stats, N2)
    bench_stat(stats, N2)
Ejemplo n.º 10
0
from MrIf import LOGFILE,log,cards_order,MrRandom,Human,MrIf
from OfflineInterface import OfflineInterface

if __name__=="__main__":
    #创建一系列机器人对象
    #机器人对象应当有如下方法
    #receive_shuffle(self,cards) 接收洗牌
    #pick_a_card(self,suit) 出哪张牌,但不更新数据结构,要等到offlineinterface调用机器人的pop_card才更新
    #pop_card(self,which) 确认手牌打出后会被调用,更新手牌的数据结构
    #但是offlineinterface还没有把历史信息传给机器人的功能,请自定义函数
    random0=MrRandom(0,0,"random0")
    random1=MrRandom(0,1,"random1")
    random2=MrRandom(0,2,"random2")
    random3=MrRandom(0,3,"random3")
    if0=MrIf(0,0,"if0")
    if1=MrIf(0,1,"if1")
    if2=MrIf(0,2,"if2")
    if3=MrIf(0,2,"if3")
    #使用四个机器人初始化OfflineInterface(机器人也可以是Human,这样人就可以加入)
    offlineinterface=OfflineInterface([if0,random1,if2,random3])
    #发牌
    offlineinterface.shuffle()
    #或者指定发什么牌然后发牌
    #offlineinterface.shuffle(cards=cards)
    #打54张牌
    for i in range(52):
        offlineinterface.step()
    #打印分数
    log(offlineinterface.clear())
    #准备重新开始
    offlineinterface.prepare_new()