Ejemplo n.º 1
0
                besttau = tau
                bestwfa = copy.deepcopy(wfa)
            elif score < bestscore and abs(score-1000)<0.1:
                bestscore = score
                besttau = tau
                bestwfa = copy.deepcopy(wfa)

            print "Tau: ",tau, " Score: ", score

            avruntime += wfa.buildtime
            nummodelsmade += 1

        if metric == "WER":
            bestscore = bestwfa.get_WER(testdata)

        iohelpers.write_results(RESULTS_DIR+"co-"+wfatype+"-"+str(maxbasissize)+"-pautomac="+problem+"-"+metric+".txt", problem, wfatype+", "+"tau= "+str(besttau)+", basis size="+str(len(basisdict)), metric, bestscore, avruntime/float(nummodelsmade))

        if probabilistic:
            iohelpers.write_pnfa_model(MODEL_DIR+"co-"+wfatype+"-"+str(maxbasissize)+"-pautomac="+problem+"-"+metric+".fsm", bestwfa)


    else:
        

        RESULTS_DIR = "/home/williamleif/Dropbox/icml2014-experiments/results/real/"

        if problem == "tree":
            traindata = iohelpers.parse_file("/home/williamleif/Dropbox/icml2014-experiments/datasets/treebankdata.obs")
            validdata = traindata[0:5000]
            testdata = traindata[5000:10000]
            traindata = traindata[10000:len(traindata)]
		wfa = MomentInitEmWFA(dimension, n_symbols, MODELPATH+modelfile)

		wfa.fit(PAUTOMACPATH+problem+".pautomac.em")

		wer = wfa.wer
		kl = wfa.kl

	else:

		traindata = iohelpers.parse_file("/home/williamleif/Dropbox/icml2014-experiments/datasets/treebankdata.obs")
		validdata = traindata[0:5000]
		testdata = traindata[5000:10000]
		traindata = traindata[10000:len(traindata)]

		fp = open("treetemp.obs", "w")
		fp.write("\n".join([" ".join([str(j) for j in i]) for i in traindata]))
		fp.close()

		wfa = MomentInitEmWFA(dimension, n_symbols, MODELPATH+modelfile)

		wfa.realfit("treetemp.obs",validdata)

		kl = wfa.kl
		wer = wfa.bestwfa.get_WER(testdata)



	print "KL: ", kl, " WER: ", wer

	iohelpers.write_results(RESULTS_DIR+"em"+modelfile, problem, "size= "+str(dimension), "KL,WER", str(kl)+", "+str(wer), wfa.buildtime)
            if bestsize == 0:
                bestwfa = copy.deepcopy(wfa)
                bestsize = i
            elif score < bestscore or math.isnan(bestscore):
                bestscore = score
                bestsize = i
            avruntime += wfa.buildtime
            nummodelsmade += 1

            print "Model size: ", i, " Score: ", score

        if metric == "WER":
            wfa.resize(traindata,i,substring)
            bestscore = wfa.get_WER(testdata)

        iohelpers.write_results(RESULTS_DIR+"spectral-"+esttype+"-pautomac="+problem+"-"+metric+".txt", problem, esttype+", "+"size= "+str(bestsize)+", basis size="+str(basislength), metric, bestscore, avruntime/float(nummodelsmade))

    else:

        RESULTS_DIR = "/home/williamleif/Dropbox/icml2014-experiments/results/real/"

        if problem == "tree":
            traindata = iohelpers.parse_file("/home/williamleif/Dropbox/icml2014-experiments/datasets/treebankdata.obs")
            validdata = traindata[0:5000]
            testdata = traindata[5000:10000]
            traindata = traindata[10000:len(traindata)]

        if substring:
            basisdict = hankelmatrixcreator.top_k_basis(traindata,maxbasissize,n_symbols, 4)
            basislength = len(basisdict)
        else:
            if not success:
                break

            if metric == "WER":
                score = wfa.get_WER(validdata)
            else:
                score = wfa.get_perplexity(validdata)

            if bestsize == 0:
                bestscore = score
                bestsize = i
                bestwfa = copy.deepcopy(wfa)
            elif score < bestscore and abs(score-1000) > 0.1:
                bestscore = score
                bestsize = i
                bestwfa = copy.deepcopy(wfa)

            print "Model size: ", i, " Score: ", score
            avruntime += wfa.buildtime
            nummodelsmade += 1

        if metric == "WER":
            bestscore = bestwfa.get_WER(testdata)
        else:
            bestscore = bestwfa.get_perplexity(testdata)

        iohelpers.write_results(RESULTS_DIR+"tensor-"+metric+".txt", problem,"size= "+str(bestsize)+", basis size="+str(basislength), metric, bestscore, 0)
        iohelpers.write_pnfa_model(MODEL_DIR+"tensor-"+str(bestsize)+"-pautomac="+problem+"-"+metric+".fsm", bestwfa)

Ejemplo n.º 5
0
				klsize = i
				wersize = i
			else:
				if kl < bestkl:
					bestkl = kl
					klsize = i
				if wer < bestwer:
					bestwer = wer
					wersize = i

			avruntime += wfa.buildtime
			nummodelsmade += 1

			print "Model size: ", i, " KL: ", kl, " WER: ", wer

		iohelpers.write_results(RESULTS_DIR+"em-pautomac="+problem+".txt", problem,  "KL size:"+str(klsize)+" WER size: "+str(wersize)+" 30 KL: "+str(thirtykl)+" 30 WER: "+str(thirtywer), "KL, WER", str(bestkl)+","+str(bestwer), avruntime/float(nummodelsmade))

	else:
		if problem == "tree":
			traindata = iohelpers.parse_file("/home/williamleif/Dropbox/icml2014-experiments/datasets/treebankdata.obs")
			validdata = traindata[0:5000]
			testdata = traindata[5000:10000]
			traindata = traindata[10000:len(traindata)]

		fp = open("treetemp.obs", "w")
		fp.write("\n".join([" ".join([str(j) for j in i]) for i in traindata]))
		fp.close()

		
		avruntime = 0
		nummodelsmade = 0