besttau = tau bestwfa = copy.deepcopy(wfa) elif score < bestscore and abs(score-1000)<0.1: bestscore = score besttau = tau bestwfa = copy.deepcopy(wfa) print "Tau: ",tau, " Score: ", score avruntime += wfa.buildtime nummodelsmade += 1 if metric == "WER": bestscore = bestwfa.get_WER(testdata) iohelpers.write_results(RESULTS_DIR+"co-"+wfatype+"-"+str(maxbasissize)+"-pautomac="+problem+"-"+metric+".txt", problem, wfatype+", "+"tau= "+str(besttau)+", basis size="+str(len(basisdict)), metric, bestscore, avruntime/float(nummodelsmade)) if probabilistic: iohelpers.write_pnfa_model(MODEL_DIR+"co-"+wfatype+"-"+str(maxbasissize)+"-pautomac="+problem+"-"+metric+".fsm", bestwfa) else: RESULTS_DIR = "/home/williamleif/Dropbox/icml2014-experiments/results/real/" if problem == "tree": traindata = iohelpers.parse_file("/home/williamleif/Dropbox/icml2014-experiments/datasets/treebankdata.obs") validdata = traindata[0:5000] testdata = traindata[5000:10000] traindata = traindata[10000:len(traindata)]
wfa = MomentInitEmWFA(dimension, n_symbols, MODELPATH+modelfile) wfa.fit(PAUTOMACPATH+problem+".pautomac.em") wer = wfa.wer kl = wfa.kl else: traindata = iohelpers.parse_file("/home/williamleif/Dropbox/icml2014-experiments/datasets/treebankdata.obs") validdata = traindata[0:5000] testdata = traindata[5000:10000] traindata = traindata[10000:len(traindata)] fp = open("treetemp.obs", "w") fp.write("\n".join([" ".join([str(j) for j in i]) for i in traindata])) fp.close() wfa = MomentInitEmWFA(dimension, n_symbols, MODELPATH+modelfile) wfa.realfit("treetemp.obs",validdata) kl = wfa.kl wer = wfa.bestwfa.get_WER(testdata) print "KL: ", kl, " WER: ", wer iohelpers.write_results(RESULTS_DIR+"em"+modelfile, problem, "size= "+str(dimension), "KL,WER", str(kl)+", "+str(wer), wfa.buildtime)
if bestsize == 0: bestwfa = copy.deepcopy(wfa) bestsize = i elif score < bestscore or math.isnan(bestscore): bestscore = score bestsize = i avruntime += wfa.buildtime nummodelsmade += 1 print "Model size: ", i, " Score: ", score if metric == "WER": wfa.resize(traindata,i,substring) bestscore = wfa.get_WER(testdata) iohelpers.write_results(RESULTS_DIR+"spectral-"+esttype+"-pautomac="+problem+"-"+metric+".txt", problem, esttype+", "+"size= "+str(bestsize)+", basis size="+str(basislength), metric, bestscore, avruntime/float(nummodelsmade)) else: RESULTS_DIR = "/home/williamleif/Dropbox/icml2014-experiments/results/real/" if problem == "tree": traindata = iohelpers.parse_file("/home/williamleif/Dropbox/icml2014-experiments/datasets/treebankdata.obs") validdata = traindata[0:5000] testdata = traindata[5000:10000] traindata = traindata[10000:len(traindata)] if substring: basisdict = hankelmatrixcreator.top_k_basis(traindata,maxbasissize,n_symbols, 4) basislength = len(basisdict) else:
if not success: break if metric == "WER": score = wfa.get_WER(validdata) else: score = wfa.get_perplexity(validdata) if bestsize == 0: bestscore = score bestsize = i bestwfa = copy.deepcopy(wfa) elif score < bestscore and abs(score-1000) > 0.1: bestscore = score bestsize = i bestwfa = copy.deepcopy(wfa) print "Model size: ", i, " Score: ", score avruntime += wfa.buildtime nummodelsmade += 1 if metric == "WER": bestscore = bestwfa.get_WER(testdata) else: bestscore = bestwfa.get_perplexity(testdata) iohelpers.write_results(RESULTS_DIR+"tensor-"+metric+".txt", problem,"size= "+str(bestsize)+", basis size="+str(basislength), metric, bestscore, 0) iohelpers.write_pnfa_model(MODEL_DIR+"tensor-"+str(bestsize)+"-pautomac="+problem+"-"+metric+".fsm", bestwfa)
klsize = i wersize = i else: if kl < bestkl: bestkl = kl klsize = i if wer < bestwer: bestwer = wer wersize = i avruntime += wfa.buildtime nummodelsmade += 1 print "Model size: ", i, " KL: ", kl, " WER: ", wer iohelpers.write_results(RESULTS_DIR+"em-pautomac="+problem+".txt", problem, "KL size:"+str(klsize)+" WER size: "+str(wersize)+" 30 KL: "+str(thirtykl)+" 30 WER: "+str(thirtywer), "KL, WER", str(bestkl)+","+str(bestwer), avruntime/float(nummodelsmade)) else: if problem == "tree": traindata = iohelpers.parse_file("/home/williamleif/Dropbox/icml2014-experiments/datasets/treebankdata.obs") validdata = traindata[0:5000] testdata = traindata[5000:10000] traindata = traindata[10000:len(traindata)] fp = open("treetemp.obs", "w") fp.write("\n".join([" ".join([str(j) for j in i]) for i in traindata])) fp.close() avruntime = 0 nummodelsmade = 0