Esempio n. 1
0
def speed_test():
    from approximator import Approximator
    import time
    import tensorflow as tf
    from learn.preprocessing import faster_featurize
    import settings
    from environment import load_DS

    settings.init()
    load_DS('dataset/krk.epd')
    settings.params['PL'] = list('KRkr')
    model_fn = 'Models/stem_leaf/TDLeaf/TDLeaf_stem_or_leaf_7__03_07/TDLeaf_stem_or_leaf_7__03_07-1_13299-0'
    with tf.Session() as sess:
        saver = tf.train.import_meta_graph(model_fn + '.meta')
        saver.restore(sess, model_fn)
        approx = Approximator(sess)
        V = approx.value
        F = faster_featurize

        avg_time1 = 0
        avg_time2 = 0
        avg_time3 = 0

        for _ in xrange(20):
            env = Environment()

            flag = False
            mv_cnt = 0
            time1 = 0
            time2 = 0
            time3 = 0
            while not flag:
                if env.is_terminal():
                    flag = True

                else:
                    start = time.time()
                    a, score = alphabeta_native(V, F, env, 3, -float('inf'),
                                                float('inf'))
                    end = time.time()
                    a2, score2 = alphabeta_batch_hist(V, F, env,
                                                      list(env.hist.keys()), 3,
                                                      -float('inf'),
                                                      float('inf'))
                    end2 = time.time()
                    a3, score = alphabeta_batch(V, F, env, 3, -float('inf'),
                                                float('inf'))
                    end3 = time.time()
                    env.perform_action(a)
                    time1 += end - start
                    time2 += end2 - end
                    time3 += end3 - end2
                    mv_cnt += 1

            avg_time1 += time1 / mv_cnt
            avg_time2 += time2 / mv_cnt
            avg_time3 += time3 / mv_cnt

        print avg_time1 / 100, avg_time2 / 100, avg_time3 / 100
Esempio n. 2
0
                        help='number of episodes to play',
                        type=int)
    parser.add_argument('-p', help='piece cfg')
    parser.add_argument('-D', help='dset file')
    parser.add_argument(
        '-R',
        default=10,
        type=int,
        help='number of random moves to play before registration')
    parser.add_argument('-d', default=3, type=int, help='depth')
    parser.add_argument('-w', action='store_true')

    args = parser.parse_args()

    settings.init()
    settings.params['USE_DSET'] = True
    settings.params['PL'] = args.p
    load_DS(args.D)
    settings.params['RAND'] = args.R
    settings.params['OC_DEPTH'] = args.d

    model_fn = args.c
    with tf.Session() as sess:
        saver = tf.train.import_meta_graph(model_fn + '.meta')
        saver.restore(sess, model_fn)
        approx = Approximator(sess)
        agent = tdstem.TDStemPlayAgent(approx, depth=3)
        A, evaldict, all_s = opt.recursive_eval_sim(agent, N=args.N, w=args.w)
        with open(args.o, 'wb') as f:
            cp.dump((A, evaldict, all_s), f)
Esempio n. 3
0
def comparison_3p():
    import tablebases

    with open('Models/3P/TDS4/meta','rb') as f:
        stem=cp.load(f)
    with open('Models/3P/TDL4/meta','rb') as f:
        leaf=cp.load(f)

    settings.init()
    load_DS('dataset/3p_2.epd')
    settings.params['PL']='KQRBNPkqrbnp'
    settings.params['USE_DSET']=True

    N_l=np.array([0]+leaf['N'],dtype=float) 
    N_s=np.array([1]+stem['N'],dtype=float) 
    eps_l=leaf['eps'] 
    eps_s=stem['eps']
    w_l=np.array([0]+leaf['w_list'])/N_l
    w_s=np.array([0]+stem['w_list'])/N_s
    e_l=np.cumsum(N_l)
    e_s=np.cumsum(N_s)

    plt.figure(1)
    plt.subplot(221)
    line_stem, =plt.plot(e_s,w_s,label='TD-Stem'+r'$(\lambda)$')
    line_leaf, =plt.plot(e_l,w_l,label='TD-Leaf'+r'$(\lambda)$')
    plt.xlabel(r'$N$')
    plt.ylabel('winning rate')
    plt.legend(handles=[line_stem])
    #plt.show()

    for i in range(3,8):
        with open('Models/3P/TDL{}/meta'.format(i),'rb') as f:
            leaf=cp.load(f)
        with open('Models/3P/TDS{}/meta'.format(i),'rb') as f:
            stem=cp.load(f)
        print leaf['lambda']

        with open('Models/3P/TDL{}/sim'.format(i),'rb') as f:
            A,evaldict,S=cp.load(f)
        wcl=np.mean(np.array(evaldict['wc']))
        wel=np.mean(np.array(evaldict['we']))
        lhsl=np.mean(np.array(evaldict['lhs']))


        with open('Models/3P/TDS{}/sim'.format(i),'rb') as f:
            A,evaldict,S=cp.load(f)
        wcs=np.mean(np.array(evaldict['wc']))
        wes=np.mean(np.array(evaldict['we']))
        lhss=np.mean(np.array(evaldict['lhs']))
        print '{}: STEM: {}, {}, {} \tLEAF: {}, {}, {}'.format(i,wcs,wes,lhss,wcl,wel,lhsl)
        mps_s=np.mean(np.array(stem['mps']))
        mps_l=np.mean(np.array(leaf['mps']))
        print '{}, {}'.format(mps_s,mps_l)
        print stem['episodes']

    '''
    Sw=[t[-1] for t in tw]
    print Sw[:100]
    dtmw=[t[1] for t in tw]
    wdlw=[t[0] for t in tw]

    vw_s=Approximator.V(Sw,model_fn)

    hist_wcs=90*[0]
    hist_wes=90*[0]
    hist_lhss=90*[0]
    hist_dcs=90*[0]
    avg_vs=90*[0]
    #print A
    for i in xrange(len(hist_wcs)):
        hist_wcs[i]=np.mean(np.array([wc(t) for t in A if wc(t) is not None and 
                    t[1]==i+1]))
        hist_wes[i]=np.mean(np.array([we(t) for t in A if we(t) is not None and 
                    t[1]==i+1]))
        hist_lhss[i]=np.mean(np.array([lhs(t) for t in A if lhs(t) is not None and 
                    t[1]==i+1]))
        hist_dcs[i]=np.mean(np.array([dc(t) for t in A if dc(t) is not None and 
                    t[1]==i+1]))
        avg_vs[i]=np.mean(np.array([vw_s[j] for j in xrange(vw_s.shape[0]) if
                                   dtmw[j]==i and wdlw[j]==1]))

    '''

    '''
    model_fn='Models/3PIECES_LEAF/TDLeaf_3pieces_leaf_5__25_07/TDLeaf_3pieces_leaf_5__25_07-1_67793-40'
    with open('Models/3PIECES_LEAF/sim','rb') as f:
        A,evaldict,S=cp.load(f)
    dcl=np.mean(np.array(evaldict['dc']))

    tw=[t for t in A if tablebases.probe_result(t[-1])==1]
    td=[t for t in A if tablebases.probe_result(t[-1])==0]
    tb=[t for t in A if tablebases.probe_result(t[-1])==-1]

    Sw=[t[-1] for t in tw]
    dtmw=[t[1] for t in tw]
    wdlw=[t[0] for t in tw]

    vw_l=Approximator.V(Sw,model_fn)
    hist_wcl=90*[0]
    hist_wel=90*[0]
    hist_lhsl=90*[0]
    hist_dcl=90*[0]
    avg_vl=90*[0]
    #print A
    for i in xrange(len(hist_wcs)):
        hist_wcl[i]=np.mean(np.array([wc(t) for t in A if wc(t) is not None and 
                    t[1]==i+1]))
        hist_wel[i]=np.mean(np.array([we(t) for t in A if we(t) is not None and 
                    t[1]==i+1]))
        hist_lhsl[i]=np.mean(np.array([lhs(t) for t in A if lhs(t) is not None and 
                    t[1]==i+1]))
        hist_dcl[i]=np.mean(np.array([dc(t) for t in A if dc(t) is not None and 
                    t[1]==i+1]))
        avg_vl[i]=np.mean(np.array([vw_l[j] for j in xrange(vw_l.shape[0]) if
                                   dtmw[j]==i and wdlw[j]==1]))
    '''

    '''
Esempio n. 4
0
def comparison_stem_leaf_kqk():

    model_fn_leaf='Models/KQK/TDL/network'
    with open('Models/KQK/TDL_BAD/sim','rb') as f:
        A,evaldict,S=cp.load(f)
    wc_l=np.mean(np.array(evaldict['wc']))
    we_l=np.mean(np.array(evaldict['we']))
    lhs_l=np.mean(np.array(evaldict['lhs']))
    #t=stem[''] 
    print wc_l, we_l, lhs_l
    model_fn_leaf='Models/KQK/TDS/network'
    with open('Models/KQK/TDS_BAD/sim','rb') as f:
        A,evaldict,S=cp.load(f)
    wc_s=np.mean(np.array(evaldict['wc']))
    we_s=np.mean(np.array(evaldict['we']))
    lhs_s=np.mean(np.array(evaldict['lhs']))
    #t=stem[''] 
    print wc_s, we_s, lhs_s
    with open('Models/KQK/TDL_BAD/meta','rb') as f:
        leaf=cp.load(f)
    with open('Models/KQK/TDS_BAD/meta','rb') as f:
        stem=cp.load(f)
    mps_s=np.mean(np.array(stem['mps']))
    mps_l=np.mean(np.array(leaf['mps']))
    ntot_s=stem['episodes']
    ntot_l=leaf['episodes']
    el_s=stem['elapsed_time']
    el_l=leaf['elapsed_time']
    print mps_s, mps_l, ntot_s, ntot_l, el_s, el_l



    import tablebases
    with open('Models/KQK/TDL/meta','rb') as f:
        leaf=cp.load(f)
    with open('Models/KQK/TDS/meta','rb') as f:
        stem=cp.load(f)

    settings.init()
    load_DS('dataset/kqk_fics.epd')
    settings.params['PL']='KQkq'
    settings.params['USE_DSET']=True

    N_l=np.array([0]+leaf['N'],dtype=float) 
    N_s=np.array([0]+stem['N'],dtype=float) 
    eps_l=leaf['eps'] 
    eps_s=stem['eps']
    w_l=np.array([0]+leaf['w_list'])/N_l
    w_s=np.array([0]+stem['w_list'])/N_s
    e_l=np.cumsum(N_l)
    e_s=np.cumsum(N_s)

    stages_s=[t[0] for t in stem['lambda']]
    stages_l=[t[0] for t in leaf['lambda']]
    print leaf['lambda']

    l_l=leaf['avg_len'] 
    l_s=stem['avg_len']

    plt.figure(1)
    plt.subplot(111)
    line_stem, =plt.plot(e_s,w_s,label='TD-Stem'+r'$(\lambda)$')
    line_leaf, =plt.plot(e_l,w_l,label='TD-Leaf'+r'$(\lambda)$')
    for i in stages_s:
        plt.axvline(x=i,color='#99ccff')
    for i in stages_l:
        plt.axvline(x=i,color='#ffc266')
    plt.xlabel(r'$N$')
    plt.ylabel('winning rate')
    plt.legend(handles=[line_leaf,line_stem])
    plt.xlim(0,max(max(e_l),max(e_s)))
    plt.ylim(0,1)
    plt.show()

    mps_s=np.mean(np.array(stem['mps']))
    mps_l=np.mean(np.array(leaf['mps']))
    ntot_s=stem['episodes']
    ntot_l=leaf['episodes']
    el_s=stem['elapsed_time']
    el_l=leaf['elapsed_time']

    print mps_s, mps_l, ntot_s, ntot_l, el_s, el_l

    model_fn_stem='Models/KQK/TDS/network'
    with open('Models/KQK/TDS/sim2','rb') as f:
        A,evaldict,S=cp.load(f)

    wc_s=np.mean(np.array(evaldict['wc']))
    we_s=np.mean(np.array(evaldict['we']))
    lhs_s=np.mean(np.array(evaldict['lhs']))
    #t=stem['']
    print wc_s, we_s, lhs_s

    tw=[t for t in A if tablebases.probe_result(t[-1])==1]
    td=[t for t in A if tablebases.probe_result(t[-1])==0]
    tb=[t for t in A if tablebases.probe_result(t[-1])==-1]

    Sw=[t[-1] for t in tw]
    dtmw=[t[1] for t in tw]
    print min(dtmw)
    wdlw=[t[0] for t in tw]

    Sb=[t[-1] for t in tb]
    dtmb=[t[1] for t in tb]
    wdlb=[t[0] for t in tb]
    print min(dtmb)

    vw_s=Approximator.V(Sw,model_fn_stem)
    vb_s=Approximator.V(Sb,model_fn_stem)

    hist_wcs=20*[0]
    hist_wes=20*[0]
    hist_lhss=20*[0]
    hist_dcs=20*[0]
    avg_vs=20*[0]
    std_vs=20*[0]
    avg_vsb=20*[0]
    std_vsb=20*[0]
    #print A
    for i in xrange(len(hist_wcs)):
        hist_wcs[i]=np.mean(np.array([wc(t) for t in A if wc(t) is not None and 
                    t[1]==i+1]))
        hist_wes[i]=np.mean(np.array([we(t) for t in A if we(t) is not None and 
                    t[1]==i+1]))
        hist_lhss[i]=np.mean(np.array([lhs(t) for t in A if lhs(t) is not None and 
                    t[1]==i+1]))
        hist_dcs[i]=np.mean(np.array([dc(t) for t in A if dc(t) is not None and 
                    t[1]==i+1]))
        avg_vs[i]=np.mean(np.array([vw_s[j] for j in xrange(vw_s.shape[0]) if
                                   dtmw[j]==i+1 ]))
        std_vs[i]=np.std(np.array([vw_s[j] for j in xrange(vw_s.shape[0]) if
                                   dtmw[j]==i+1 and wdlw[j]==1]))
        avg_vsb[i]=np.mean(np.array([vb_s[j] for j in xrange(vb_s.shape[0]) if
                                   dtmb[j]==i+1 ]))
        std_vsb[i]=np.std(np.array([vb_s[j] for j in xrange(vb_s.shape[0]) if
                                   dtmb[j]==i+1 and wdlb[j]==1]))

    model_fn_leaf='Models/KQK/TDL/network'
    with open('Models/KQK/TDL/sim2','rb') as f:
        A,evaldict,S=cp.load(f)
    wc_l=np.mean(np.array(evaldict['wc']))
    we_l=np.mean(np.array(evaldict['we']))
    lhs_l=np.mean(np.array(evaldict['lhs']))
    #t=stem['']
    print wc_l, we_l, lhs_l

    tw=[t for t in A if tablebases.probe_result(t[-1])==1]
    td=[t for t in A if tablebases.probe_result(t[-1])==0]
    tb=[t for t in A if tablebases.probe_result(t[-1])==-1]
    Sw=[t[-1] for t in tw]
    dtmw=[t[1] for t in tw]
    wdlw=[t[0] for t in tw]
    Sb=[t[-1] for t in tb]
    dtmb=[t[1] for t in tb]
    wdlb=[t[0] for t in tb]

    vw_l=Approximator.V(Sw,model_fn_leaf)
    vb_l=Approximator.V(Sb,model_fn_leaf)

    hist_wcl=20*[0]
    hist_wel=20*[0]
    hist_lhsl=20*[0]
    hist_dcl=20*[0]
    avg_vl=20*[0]
    std_vl=20*[0]
    avg_vlb=20*[0]
    std_vlb=20*[0]

    for i in xrange(len(hist_wcs)):
        hist_wcl[i]=np.mean(np.array([wc(t) for t in A if wc(t) is not None and 
                    t[1]==i+1]))
        hist_wel[i]=np.mean(np.array([we(t) for t in A if we(t) is not None and 
                    t[1]==i+1]))
        hist_lhsl[i]=np.mean(np.array([lhs(t) for t in A if lhs(t) is not None and 
                    t[1]==i+1]))
        hist_dcl[i]=np.mean(np.array([dc(t) for t in A if dc(t) is not None and 
                    t[1]==i+1]))
        avg_vl[i]=np.mean(np.array([vw_l[j] for j in xrange(vw_l.shape[0]) if
                                   dtmw[j]==i+1 ]))
        std_vl[i]=np.std(np.array([vw_l[j] for j in xrange(vw_l.shape[0]) if
                                   dtmw[j]==i+1 and wdlw[j]==1]))
        avg_vlb[i]=np.mean(np.array([vb_l[j] for j in xrange(vb_l.shape[0]) if
                                   dtmb[j]==i+1 ]))
        std_vlb[i]=np.std(np.array([vb_l[j] for j in xrange(vb_l.shape[0]) if
                                   dtmb[j]==i+1 and wdlb[j]==1]))

    x=np.array(range(1,len(hist_wcs)+1))
    plt.figure(2)
    plt.subplot(111)
    b1=plt.bar(x-1./6, hist_wcs,width=1./3,align='center',label='TD-Stem'+r'$(\lambda)$ ')
    b2=plt.bar(x+1./6, hist_wcl,width=1./3,align='center',label='TD-Leaf'+r'$(\lambda)$ ')
    #plt.title('kqk endgame win conversion rate')
    plt.legend(handles=[b1,b2])
    plt.xlabel('DTM')
    plt.ylabel('WCR')
    plt.xlim(0,x.max())
    plt.ylim(0,1)
    plt.xticks(x,x)
    plt.show()

    plt.figure(3)
    plt.subplot(111)
    b1=plt.bar(x-1./6, hist_wes,width=1./3,align='center',label='TD-Stem'+r'$(\lambda)$ ')
    b2=plt.bar(x+1./6, hist_wel,width=1./3,align='center',label='TD-Leaf'+r'$(\lambda)$ ')
    #plt.title('kqk endgame win efficiency')
    plt.legend(handles=[b1,b2])
    plt.xlabel('DTM')
    plt.ylabel('WE')
    plt.xlim(0,x.max())
    plt.ylim(0,1)
    plt.xticks(x,x)
    plt.show()

    plt.figure(4)
    plt.subplot(111)
    b1=plt.bar(x-1./6, hist_lhss,width=1./3,align='center',label='TD-Stem'+r'$(\lambda)$ ')
    b2=plt.bar(x+1./6, hist_lhsl,width=1./3,align='center',label='TD-Leaf'+r'$(\lambda)$ ')
    #plt.title('kqk endgame loss holding score')
    plt.legend(handles=[b1,b2])
    plt.xlabel('DTM')
    plt.ylabel('LHS')
    plt.xlim(0,x.max())
    plt.ylim(0,1)
    plt.xticks(x,x)
    plt.show()

    plt.figure(5)
    b1,=plt.plot(x,avg_vs,label='TD-Stem'+r'$(\lambda)$ ')
    b2,=plt.plot(x,avg_vl,label='TD-Leaf'+r'$(\lambda)$ ')
    s1,=plt.plot(x,np.array(avg_vs)+2*np.array(std_vs),color='#99ccff')
    s2,=plt.plot(x,np.array(avg_vs)-2*np.array(std_vs),color='#99ccff')
    s3,=plt.plot(x,np.array(avg_vl)+2*np.array(std_vl),color='#ffc266')
    s4,=plt.plot(x,np.array(avg_vl)-2*np.array(std_vl),color='#ffc266')

    c1,=plt.plot(x,avg_vsb,label='TD-Stem'+r'$(\lambda)$ ',color=b1.get_color())
    c2,=plt.plot(x,avg_vlb,label='TD-Leaf'+r'$(\lambda)$ ',color=b2.get_color())
    t1,=plt.plot(x,np.array(avg_vsb)+2*np.array(std_vsb),color='#99ccff')
    t2,=plt.plot(x,np.array(avg_vsb)-2*np.array(std_vsb),color='#99ccff')
    t3,=plt.plot(x,np.array(avg_vlb)+2*np.array(std_vlb),color='#ffc266')
    t4,=plt.plot(x,np.array(avg_vlb)-2*np.array(std_vlb),color='#ffc266')
    plt.xticks(x,x)

    #plt.title('krk endgame win conversion rate')
    plt.legend(handles=[b1,b2])
    plt.xlabel('DTM')
    plt.ylabel('E[V]')
    plt.xlim(0,x.max())
    #plt.ylim(0,1)
    plt.show()
Esempio n. 5
0
def comparison_stem_leaf():
    settings.init()
    settings.params['USE_DSET']=True
    settings.params['PL']='KRkr'
    load_DS('dataset/krk.epd')
    
    with open('Models/stem_leaf/TDLeaf/TDLeaf_stem_or_leaf_7__03_07/stem_or_leaf_7_meta.sv','rb') as f:
        leaf=cp.load(f)
    with open('Models/stem_leaf/TDStem/TDStem_stem_or_leaf_7__28_06/stem_or_leaf_7_meta.sv','rb') as f:
        stem=cp.load(f)
    print leaf.keys()
    print stem.keys()
    N_l=leaf['N'][0]
    N_s=stem['N'][0]
    #print N_l, N_s
    w_l=leaf['w_list']
    r_l=leaf['r_lists']
    l_l=leaf['avg_len']
    w_s=stem['w_list']
    r_s=stem['r_lists']
    l_s=stem['avg_len']

    mps_s=np.mean(np.array(stem['mps']))
    mps_l=np.mean(np.array(leaf['mps']))
    ntot_s=stem['episodes']
    ntot_l=leaf['episodes']
    el_s=stem['elapsed_time']
    el_l=leaf['elapsed_time']

    print mps_s, mps_l, ntot_s, ntot_l, el_s, el_l

    ep_s2=[0]
    rate_s=[]
    cumsum=0
    for i in xrange(len(w_s)):
        if i<53:
            cumsum+=5000
            rate_s.append(5000.)
        elif i<73:
            cumsum+=500
            rate_s.append(500.)
        else:
            cumsum+=250
            rate_s.append(250.)
        ep_s2.append(cumsum)
    wr_s=np.array([0]+w_s)/np.array([1]+rate_s)
    rrw_s=5000*np.array(r_s[0])/np.array(rate_s)
    rrb_s=5000*np.array(r_s[1])/np.array(rate_s)

    ep_l2=[0]
    rate_l=[]
    cumsum=0
    for i in xrange(len(w_l)):
        if i<63:
            cumsum+=5000
            rate_l.append(5000.)
        elif i<83:
            cumsum+=500
            rate_l.append(500.)
        else:
            cumsum+=250
            rate_l.append(250.)
        ep_l2.append(cumsum)
    wr_l=np.array([0]+w_l)/np.array([1]+rate_l)
    rrw_l=5000*np.array(r_l[0])/np.array(rate_l)
    rrb_l=5000*np.array(r_l[1])/np.array(rate_l)

    plt.figure(1)
    plt.subplot(111)
    line_stem, =plt.plot(ep_s2,wr_s,label='TD-Stem'+r'$(\lambda)$')
    for i in [100000,170000,264000,275000,283200,291000]:
        plt.axvline(x=i,color='#99ccff')
    line_leaf, =plt.plot(ep_l2,wr_l,label='TD-Leaf'+r'$(\lambda)$')
    for i in [120000,220000,315000,325250,333000,341000]:
        plt.axvline(x=i,color='#ffc266')
    plt.xlabel(r'$N$')
    plt.ylabel('winning rate')
    plt.legend(handles=[line_leaf,line_stem])
    plt.xlim(0,max(ep_l2))
    plt.ylim(0,1)
    #plt.title('krk endgame learning curve')

    plt.show()

    mps_s=np.mean(np.array(stem['mps']))
    mps_l=np.mean(np.array(leaf['mps']))
    ntot_s=stem['episodes']
    ntot_l=leaf['episodes']
    el_s=stem['elapsed_time']
    el_l=leaf['elapsed_time']

    print mps_s, mps_l, ntot_s, ntot_l, el_s, el_l

    model_fn='Models/stem_leaf/TDStem/TDStem_stem_or_leaf_7__28_06/TDStem_stem_or_leaf_7__28_06-1_23116-0'
    with open('Models/stem_leaf/TDStem/sim','rb') as f:
        A,evaldict,S=cp.load(f)
    
    wc_s=np.mean(np.array(evaldict['wc']))
    we_s=np.mean(np.array(evaldict['we']))
    lhs_s=np.mean(np.array(evaldict['lhs']))
    #t=stem['']
    print wc_s, we_s, lhs_s

    S=[t[-1] for t in A]
    dtm=[t[1] for t in A]
    wdl=[t[0] for t in A]
    v=Approximator.V(S,model_fn)
    hist_wcs=33*[0]
    hist_wes=33*[0]
    hist_lhss=33*[0]
    avg_vs=33*[0]
    std_vs=33*[0]
    #print A
    for i in xrange(len(hist_wcs)):
        hist_wcs[i]=np.mean(np.array([wc(t) for t in A if wc(t) is not None and 
                    t[1]==i]))
        hist_wes[i]=np.mean(np.array([we(t) for t in A if we(t) is not None and 
                    t[1]==i]))
        hist_lhss[i]=np.mean(np.array([lhs(t) for t in A if lhs(t) is not None and 
                    t[1]==i]))
        avg_vs[i]=np.mean(np.array([v[j] for j in xrange(v.shape[0]) if
                                   dtm[j]==i and wdl[j]==1]))
        std_vs[i]=np.std(np.array([v[j] for j in xrange(v.shape[0]) if
                                   dtm[j]==i and wdl[j]==1]))
        
    model_fn='Models/stem_leaf/TDLeaf/TDLeaf_stem_or_leaf_7__03_07/TDLeaf_stem_or_leaf_7__03_07-1_13299-0'
    with open('Models/stem_leaf/TDLeaf/sim','rb') as f:
        A,evaldict,S=cp.load(f)
    wc_l=np.mean(np.array(evaldict['wc']))
    we_l=np.mean(np.array(evaldict['we']))
    lhs_l=np.mean(np.array(evaldict['lhs']))
    #t=stem['']
    print wc_l, we_l, lhs_l

    S=[t[-1] for t in A]
    dtm=[t[1] for t in A]
    wdl=[t[0] for t in A]
    v=Approximator.V(S,model_fn)
    hist_wcl=33*[0]
    hist_wel=33*[0]
    hist_lhsl=33*[0]
    avg_vl=33*[0]
    std_vl=33*[0]
    #print A
    for i in xrange(len(hist_wcl)):
        hist_wcl[i]=np.mean(np.array([wc(t) for t in A if wc(t) is not None and 
                    t[1]==i+1]))
        hist_wel[i]=np.mean(np.array([we(t) for t in A if we(t) is not None and 
                    t[1]==i+1]))
        hist_lhsl[i]=np.mean(np.array([lhs(t) for t in A if lhs(t) is not None and 
                    t[1]==i+1]))
        avg_vl[i]=np.mean(np.array([v[j] for j in xrange(v.shape[0]) if
                                   dtm[j]==i+1 and wdl[j]==1]))
        std_vl[i]=np.std(np.array([v[j] for j in xrange(v.shape[0]) if
                                   dtm[j]==i+1 and wdl[j]==1]))
    
    x=np.array(range(1,len(hist_wcs)+1))

    plt.figure(2)
    plt.subplot(111)
    b1=plt.bar(x-1./6, hist_wcs,width=1./3,align='center',label='TD-Stem'+r'$(\lambda)$ ')
    b2=plt.bar(x+1./6, hist_wcl,width=1./3,align='center',label='TD-Leaf'+r'$(\lambda)$ ')
    #plt.title('krk endgame win conversion rate')
    plt.legend(handles=[b1,b2])
    plt.xlabel('DTM')
    plt.ylabel('WCR')
    plt.show()

    plt.figure(3)
    plt.subplot(111)
    b1=plt.bar(x-1./6, hist_wes,width=1./3,align='center',label='TD-Stem'+r'$(\lambda)$ ')
    b2=plt.bar(x+1./6, hist_wel,width=1./3,align='center',label='TD-Leaf'+r'$(\lambda)$ ')
    #plt.title('krk endgame win efficiency')
    plt.legend(handles=[b1,b2])
    plt.xlabel('DTM')
    plt.ylabel('WE')
    plt.show()

    plt.figure(4)
    plt.subplot(111)
    b1=plt.bar(x-1./6, hist_lhss,width=1./3,align='center',label='TD-Stem'+r'$(\lambda)$ ')
    b2=plt.bar(x+1./6, hist_lhsl,width=1./3,align='center',label='TD-Leaf'+r'$(\lambda)$ ')
    #plt.title('krk endgame loss holding score')
    plt.legend(handles=[b1,b2])
    plt.xlabel('DTM')
    plt.ylabel('LHS')
    plt.show()

    plt.figure(5)
    plt.subplot(111)
    b1,=plt.plot(x,avg_vs,label='TD-Stem'+r'$(\lambda)$ ')
    b2,=plt.plot(x,avg_vl,label='TD-Leaf'+r'$(\lambda)$ ')
    s1,=plt.plot(x,np.array(avg_vs)+2*np.array(std_vs),color='#99ccff')
    s2,=plt.plot(x,np.array(avg_vs)-2*np.array(std_vs),color='#99ccff')
    s3,=plt.plot(x,np.array(avg_vl)+2*np.array(std_vl),color='#ffc266')
    s4,=plt.plot(x,np.array(avg_vl)-2*np.array(std_vl),color='#ffc266')
    #plt.title('krk endgame value function')
    plt.legend(handles=[b1,b2])
    plt.xlabel('DTM')
    plt.ylabel(r'$V$')
    plt.show()
Esempio n. 6
0
    parser.add_argument('--mode', default='stem')
    parser.add_argument('-R', default=2, type=int)
    parser.add_argument('--mk', type=int)
    parser.add_argument('--ocd', default=5, type=int)
    parser.add_argument('--cnn-f', action='store_true')
    args = parser.parse_args()

    settings.init()
    settings.params['USE_DSET'] = args.dset
    settings.params['OC_DEPTH'] = args.ocd
    settings.params['MK'] = args.mk
    change_PL(list(args.piece_conf))
    settings.params['RAND'] = args.R
    print settings.params['PL']
    if settings.params['USE_DSET']:
        load_DS(args.ds_file)
    if args.eps_factor == 0:
        decay_f = lambda n: 1 - 0.01 * n
    else:
        decay_f = lambda n: (n + 1)**(-args.eps_factor)

    if args.old_model == None:
        D = faster_featurize('8/6k1/8/8/3K4/8/8/8 w - -').shape[1]
        print D
        M = [int(m) for m in args.M.split()]
        if args.cnn:
            c0 = len(args.piece_conf) * 2
            if args.cnn_f:
                kwargs = {
                    'D': D,
                    'M': M,