def comparison_stem_leaf_kqk(): model_fn_leaf='Models/KQK/TDL/network' with open('Models/KQK/TDL_BAD/sim','rb') as f: A,evaldict,S=cp.load(f) wc_l=np.mean(np.array(evaldict['wc'])) we_l=np.mean(np.array(evaldict['we'])) lhs_l=np.mean(np.array(evaldict['lhs'])) #t=stem[''] print wc_l, we_l, lhs_l model_fn_leaf='Models/KQK/TDS/network' with open('Models/KQK/TDS_BAD/sim','rb') as f: A,evaldict,S=cp.load(f) wc_s=np.mean(np.array(evaldict['wc'])) we_s=np.mean(np.array(evaldict['we'])) lhs_s=np.mean(np.array(evaldict['lhs'])) #t=stem[''] print wc_s, we_s, lhs_s with open('Models/KQK/TDL_BAD/meta','rb') as f: leaf=cp.load(f) with open('Models/KQK/TDS_BAD/meta','rb') as f: stem=cp.load(f) mps_s=np.mean(np.array(stem['mps'])) mps_l=np.mean(np.array(leaf['mps'])) ntot_s=stem['episodes'] ntot_l=leaf['episodes'] el_s=stem['elapsed_time'] el_l=leaf['elapsed_time'] print mps_s, mps_l, ntot_s, ntot_l, el_s, el_l import tablebases with open('Models/KQK/TDL/meta','rb') as f: leaf=cp.load(f) with open('Models/KQK/TDS/meta','rb') as f: stem=cp.load(f) settings.init() load_DS('dataset/kqk_fics.epd') settings.params['PL']='KQkq' settings.params['USE_DSET']=True N_l=np.array([0]+leaf['N'],dtype=float) N_s=np.array([0]+stem['N'],dtype=float) eps_l=leaf['eps'] eps_s=stem['eps'] w_l=np.array([0]+leaf['w_list'])/N_l w_s=np.array([0]+stem['w_list'])/N_s e_l=np.cumsum(N_l) e_s=np.cumsum(N_s) stages_s=[t[0] for t in stem['lambda']] stages_l=[t[0] for t in leaf['lambda']] print leaf['lambda'] l_l=leaf['avg_len'] l_s=stem['avg_len'] plt.figure(1) plt.subplot(111) line_stem, =plt.plot(e_s,w_s,label='TD-Stem'+r'$(\lambda)$') line_leaf, =plt.plot(e_l,w_l,label='TD-Leaf'+r'$(\lambda)$') for i in stages_s: plt.axvline(x=i,color='#99ccff') for i in stages_l: plt.axvline(x=i,color='#ffc266') plt.xlabel(r'$N$') plt.ylabel('winning rate') plt.legend(handles=[line_leaf,line_stem]) plt.xlim(0,max(max(e_l),max(e_s))) plt.ylim(0,1) plt.show() mps_s=np.mean(np.array(stem['mps'])) mps_l=np.mean(np.array(leaf['mps'])) ntot_s=stem['episodes'] ntot_l=leaf['episodes'] el_s=stem['elapsed_time'] el_l=leaf['elapsed_time'] print mps_s, mps_l, ntot_s, ntot_l, el_s, el_l model_fn_stem='Models/KQK/TDS/network' with open('Models/KQK/TDS/sim2','rb') as f: A,evaldict,S=cp.load(f) wc_s=np.mean(np.array(evaldict['wc'])) we_s=np.mean(np.array(evaldict['we'])) lhs_s=np.mean(np.array(evaldict['lhs'])) #t=stem[''] print wc_s, we_s, lhs_s tw=[t for t in A if tablebases.probe_result(t[-1])==1] td=[t for t in A if tablebases.probe_result(t[-1])==0] tb=[t for t in A if tablebases.probe_result(t[-1])==-1] Sw=[t[-1] for t in tw] dtmw=[t[1] for t in tw] print min(dtmw) wdlw=[t[0] for t in tw] Sb=[t[-1] for t in tb] dtmb=[t[1] for t in tb] wdlb=[t[0] for t in tb] print min(dtmb) vw_s=Approximator.V(Sw,model_fn_stem) vb_s=Approximator.V(Sb,model_fn_stem) hist_wcs=20*[0] hist_wes=20*[0] hist_lhss=20*[0] hist_dcs=20*[0] avg_vs=20*[0] std_vs=20*[0] avg_vsb=20*[0] std_vsb=20*[0] #print A for i in xrange(len(hist_wcs)): hist_wcs[i]=np.mean(np.array([wc(t) for t in A if wc(t) is not None and t[1]==i+1])) hist_wes[i]=np.mean(np.array([we(t) for t in A if we(t) is not None and t[1]==i+1])) hist_lhss[i]=np.mean(np.array([lhs(t) for t in A if lhs(t) is not None and t[1]==i+1])) hist_dcs[i]=np.mean(np.array([dc(t) for t in A if dc(t) is not None and t[1]==i+1])) avg_vs[i]=np.mean(np.array([vw_s[j] for j in xrange(vw_s.shape[0]) if dtmw[j]==i+1 ])) std_vs[i]=np.std(np.array([vw_s[j] for j in xrange(vw_s.shape[0]) if dtmw[j]==i+1 and wdlw[j]==1])) avg_vsb[i]=np.mean(np.array([vb_s[j] for j in xrange(vb_s.shape[0]) if dtmb[j]==i+1 ])) std_vsb[i]=np.std(np.array([vb_s[j] for j in xrange(vb_s.shape[0]) if dtmb[j]==i+1 and wdlb[j]==1])) model_fn_leaf='Models/KQK/TDL/network' with open('Models/KQK/TDL/sim2','rb') as f: A,evaldict,S=cp.load(f) wc_l=np.mean(np.array(evaldict['wc'])) we_l=np.mean(np.array(evaldict['we'])) lhs_l=np.mean(np.array(evaldict['lhs'])) #t=stem[''] print wc_l, we_l, lhs_l tw=[t for t in A if tablebases.probe_result(t[-1])==1] td=[t for t in A if tablebases.probe_result(t[-1])==0] tb=[t for t in A if tablebases.probe_result(t[-1])==-1] Sw=[t[-1] for t in tw] dtmw=[t[1] for t in tw] wdlw=[t[0] for t in tw] Sb=[t[-1] for t in tb] dtmb=[t[1] for t in tb] wdlb=[t[0] for t in tb] vw_l=Approximator.V(Sw,model_fn_leaf) vb_l=Approximator.V(Sb,model_fn_leaf) hist_wcl=20*[0] hist_wel=20*[0] hist_lhsl=20*[0] hist_dcl=20*[0] avg_vl=20*[0] std_vl=20*[0] avg_vlb=20*[0] std_vlb=20*[0] for i in xrange(len(hist_wcs)): hist_wcl[i]=np.mean(np.array([wc(t) for t in A if wc(t) is not None and t[1]==i+1])) hist_wel[i]=np.mean(np.array([we(t) for t in A if we(t) is not None and t[1]==i+1])) hist_lhsl[i]=np.mean(np.array([lhs(t) for t in A if lhs(t) is not None and t[1]==i+1])) hist_dcl[i]=np.mean(np.array([dc(t) for t in A if dc(t) is not None and t[1]==i+1])) avg_vl[i]=np.mean(np.array([vw_l[j] for j in xrange(vw_l.shape[0]) if dtmw[j]==i+1 ])) std_vl[i]=np.std(np.array([vw_l[j] for j in xrange(vw_l.shape[0]) if dtmw[j]==i+1 and wdlw[j]==1])) avg_vlb[i]=np.mean(np.array([vb_l[j] for j in xrange(vb_l.shape[0]) if dtmb[j]==i+1 ])) std_vlb[i]=np.std(np.array([vb_l[j] for j in xrange(vb_l.shape[0]) if dtmb[j]==i+1 and wdlb[j]==1])) x=np.array(range(1,len(hist_wcs)+1)) plt.figure(2) plt.subplot(111) b1=plt.bar(x-1./6, hist_wcs,width=1./3,align='center',label='TD-Stem'+r'$(\lambda)$ ') b2=plt.bar(x+1./6, hist_wcl,width=1./3,align='center',label='TD-Leaf'+r'$(\lambda)$ ') #plt.title('kqk endgame win conversion rate') plt.legend(handles=[b1,b2]) plt.xlabel('DTM') plt.ylabel('WCR') plt.xlim(0,x.max()) plt.ylim(0,1) plt.xticks(x,x) plt.show() plt.figure(3) plt.subplot(111) b1=plt.bar(x-1./6, hist_wes,width=1./3,align='center',label='TD-Stem'+r'$(\lambda)$ ') b2=plt.bar(x+1./6, hist_wel,width=1./3,align='center',label='TD-Leaf'+r'$(\lambda)$ ') #plt.title('kqk endgame win efficiency') plt.legend(handles=[b1,b2]) plt.xlabel('DTM') plt.ylabel('WE') plt.xlim(0,x.max()) plt.ylim(0,1) plt.xticks(x,x) plt.show() plt.figure(4) plt.subplot(111) b1=plt.bar(x-1./6, hist_lhss,width=1./3,align='center',label='TD-Stem'+r'$(\lambda)$ ') b2=plt.bar(x+1./6, hist_lhsl,width=1./3,align='center',label='TD-Leaf'+r'$(\lambda)$ ') #plt.title('kqk endgame loss holding score') plt.legend(handles=[b1,b2]) plt.xlabel('DTM') plt.ylabel('LHS') plt.xlim(0,x.max()) plt.ylim(0,1) plt.xticks(x,x) plt.show() plt.figure(5) b1,=plt.plot(x,avg_vs,label='TD-Stem'+r'$(\lambda)$ ') b2,=plt.plot(x,avg_vl,label='TD-Leaf'+r'$(\lambda)$ ') s1,=plt.plot(x,np.array(avg_vs)+2*np.array(std_vs),color='#99ccff') s2,=plt.plot(x,np.array(avg_vs)-2*np.array(std_vs),color='#99ccff') s3,=plt.plot(x,np.array(avg_vl)+2*np.array(std_vl),color='#ffc266') s4,=plt.plot(x,np.array(avg_vl)-2*np.array(std_vl),color='#ffc266') c1,=plt.plot(x,avg_vsb,label='TD-Stem'+r'$(\lambda)$ ',color=b1.get_color()) c2,=plt.plot(x,avg_vlb,label='TD-Leaf'+r'$(\lambda)$ ',color=b2.get_color()) t1,=plt.plot(x,np.array(avg_vsb)+2*np.array(std_vsb),color='#99ccff') t2,=plt.plot(x,np.array(avg_vsb)-2*np.array(std_vsb),color='#99ccff') t3,=plt.plot(x,np.array(avg_vlb)+2*np.array(std_vlb),color='#ffc266') t4,=plt.plot(x,np.array(avg_vlb)-2*np.array(std_vlb),color='#ffc266') plt.xticks(x,x) #plt.title('krk endgame win conversion rate') plt.legend(handles=[b1,b2]) plt.xlabel('DTM') plt.ylabel('E[V]') plt.xlim(0,x.max()) #plt.ylim(0,1) plt.show()
def comparison_stem_leaf(): settings.init() settings.params['USE_DSET']=True settings.params['PL']='KRkr' load_DS('dataset/krk.epd') with open('Models/stem_leaf/TDLeaf/TDLeaf_stem_or_leaf_7__03_07/stem_or_leaf_7_meta.sv','rb') as f: leaf=cp.load(f) with open('Models/stem_leaf/TDStem/TDStem_stem_or_leaf_7__28_06/stem_or_leaf_7_meta.sv','rb') as f: stem=cp.load(f) print leaf.keys() print stem.keys() N_l=leaf['N'][0] N_s=stem['N'][0] #print N_l, N_s w_l=leaf['w_list'] r_l=leaf['r_lists'] l_l=leaf['avg_len'] w_s=stem['w_list'] r_s=stem['r_lists'] l_s=stem['avg_len'] mps_s=np.mean(np.array(stem['mps'])) mps_l=np.mean(np.array(leaf['mps'])) ntot_s=stem['episodes'] ntot_l=leaf['episodes'] el_s=stem['elapsed_time'] el_l=leaf['elapsed_time'] print mps_s, mps_l, ntot_s, ntot_l, el_s, el_l ep_s2=[0] rate_s=[] cumsum=0 for i in xrange(len(w_s)): if i<53: cumsum+=5000 rate_s.append(5000.) elif i<73: cumsum+=500 rate_s.append(500.) else: cumsum+=250 rate_s.append(250.) ep_s2.append(cumsum) wr_s=np.array([0]+w_s)/np.array([1]+rate_s) rrw_s=5000*np.array(r_s[0])/np.array(rate_s) rrb_s=5000*np.array(r_s[1])/np.array(rate_s) ep_l2=[0] rate_l=[] cumsum=0 for i in xrange(len(w_l)): if i<63: cumsum+=5000 rate_l.append(5000.) elif i<83: cumsum+=500 rate_l.append(500.) else: cumsum+=250 rate_l.append(250.) ep_l2.append(cumsum) wr_l=np.array([0]+w_l)/np.array([1]+rate_l) rrw_l=5000*np.array(r_l[0])/np.array(rate_l) rrb_l=5000*np.array(r_l[1])/np.array(rate_l) plt.figure(1) plt.subplot(111) line_stem, =plt.plot(ep_s2,wr_s,label='TD-Stem'+r'$(\lambda)$') for i in [100000,170000,264000,275000,283200,291000]: plt.axvline(x=i,color='#99ccff') line_leaf, =plt.plot(ep_l2,wr_l,label='TD-Leaf'+r'$(\lambda)$') for i in [120000,220000,315000,325250,333000,341000]: plt.axvline(x=i,color='#ffc266') plt.xlabel(r'$N$') plt.ylabel('winning rate') plt.legend(handles=[line_leaf,line_stem]) plt.xlim(0,max(ep_l2)) plt.ylim(0,1) #plt.title('krk endgame learning curve') plt.show() mps_s=np.mean(np.array(stem['mps'])) mps_l=np.mean(np.array(leaf['mps'])) ntot_s=stem['episodes'] ntot_l=leaf['episodes'] el_s=stem['elapsed_time'] el_l=leaf['elapsed_time'] print mps_s, mps_l, ntot_s, ntot_l, el_s, el_l model_fn='Models/stem_leaf/TDStem/TDStem_stem_or_leaf_7__28_06/TDStem_stem_or_leaf_7__28_06-1_23116-0' with open('Models/stem_leaf/TDStem/sim','rb') as f: A,evaldict,S=cp.load(f) wc_s=np.mean(np.array(evaldict['wc'])) we_s=np.mean(np.array(evaldict['we'])) lhs_s=np.mean(np.array(evaldict['lhs'])) #t=stem[''] print wc_s, we_s, lhs_s S=[t[-1] for t in A] dtm=[t[1] for t in A] wdl=[t[0] for t in A] v=Approximator.V(S,model_fn) hist_wcs=33*[0] hist_wes=33*[0] hist_lhss=33*[0] avg_vs=33*[0] std_vs=33*[0] #print A for i in xrange(len(hist_wcs)): hist_wcs[i]=np.mean(np.array([wc(t) for t in A if wc(t) is not None and t[1]==i])) hist_wes[i]=np.mean(np.array([we(t) for t in A if we(t) is not None and t[1]==i])) hist_lhss[i]=np.mean(np.array([lhs(t) for t in A if lhs(t) is not None and t[1]==i])) avg_vs[i]=np.mean(np.array([v[j] for j in xrange(v.shape[0]) if dtm[j]==i and wdl[j]==1])) std_vs[i]=np.std(np.array([v[j] for j in xrange(v.shape[0]) if dtm[j]==i and wdl[j]==1])) model_fn='Models/stem_leaf/TDLeaf/TDLeaf_stem_or_leaf_7__03_07/TDLeaf_stem_or_leaf_7__03_07-1_13299-0' with open('Models/stem_leaf/TDLeaf/sim','rb') as f: A,evaldict,S=cp.load(f) wc_l=np.mean(np.array(evaldict['wc'])) we_l=np.mean(np.array(evaldict['we'])) lhs_l=np.mean(np.array(evaldict['lhs'])) #t=stem[''] print wc_l, we_l, lhs_l S=[t[-1] for t in A] dtm=[t[1] for t in A] wdl=[t[0] for t in A] v=Approximator.V(S,model_fn) hist_wcl=33*[0] hist_wel=33*[0] hist_lhsl=33*[0] avg_vl=33*[0] std_vl=33*[0] #print A for i in xrange(len(hist_wcl)): hist_wcl[i]=np.mean(np.array([wc(t) for t in A if wc(t) is not None and t[1]==i+1])) hist_wel[i]=np.mean(np.array([we(t) for t in A if we(t) is not None and t[1]==i+1])) hist_lhsl[i]=np.mean(np.array([lhs(t) for t in A if lhs(t) is not None and t[1]==i+1])) avg_vl[i]=np.mean(np.array([v[j] for j in xrange(v.shape[0]) if dtm[j]==i+1 and wdl[j]==1])) std_vl[i]=np.std(np.array([v[j] for j in xrange(v.shape[0]) if dtm[j]==i+1 and wdl[j]==1])) x=np.array(range(1,len(hist_wcs)+1)) plt.figure(2) plt.subplot(111) b1=plt.bar(x-1./6, hist_wcs,width=1./3,align='center',label='TD-Stem'+r'$(\lambda)$ ') b2=plt.bar(x+1./6, hist_wcl,width=1./3,align='center',label='TD-Leaf'+r'$(\lambda)$ ') #plt.title('krk endgame win conversion rate') plt.legend(handles=[b1,b2]) plt.xlabel('DTM') plt.ylabel('WCR') plt.show() plt.figure(3) plt.subplot(111) b1=plt.bar(x-1./6, hist_wes,width=1./3,align='center',label='TD-Stem'+r'$(\lambda)$ ') b2=plt.bar(x+1./6, hist_wel,width=1./3,align='center',label='TD-Leaf'+r'$(\lambda)$ ') #plt.title('krk endgame win efficiency') plt.legend(handles=[b1,b2]) plt.xlabel('DTM') plt.ylabel('WE') plt.show() plt.figure(4) plt.subplot(111) b1=plt.bar(x-1./6, hist_lhss,width=1./3,align='center',label='TD-Stem'+r'$(\lambda)$ ') b2=plt.bar(x+1./6, hist_lhsl,width=1./3,align='center',label='TD-Leaf'+r'$(\lambda)$ ') #plt.title('krk endgame loss holding score') plt.legend(handles=[b1,b2]) plt.xlabel('DTM') plt.ylabel('LHS') plt.show() plt.figure(5) plt.subplot(111) b1,=plt.plot(x,avg_vs,label='TD-Stem'+r'$(\lambda)$ ') b2,=plt.plot(x,avg_vl,label='TD-Leaf'+r'$(\lambda)$ ') s1,=plt.plot(x,np.array(avg_vs)+2*np.array(std_vs),color='#99ccff') s2,=plt.plot(x,np.array(avg_vs)-2*np.array(std_vs),color='#99ccff') s3,=plt.plot(x,np.array(avg_vl)+2*np.array(std_vl),color='#ffc266') s4,=plt.plot(x,np.array(avg_vl)-2*np.array(std_vl),color='#ffc266') #plt.title('krk endgame value function') plt.legend(handles=[b1,b2]) plt.xlabel('DTM') plt.ylabel(r'$V$') plt.show()