def simulate_posterior(region, params, dates, initial, N = 1000, weekly = False, parI = (1,1), parR = (1,1),parD = (1,1), random_params = False): """Simulate from the HMM model. Args: region (str): Region for the data. params (list): Optimized parameters. dates (tuple (2)): Date range of the data. initial (dict): Initial values in dict with keys S,E,I,R,D. N (int): Number of samples. weekly (bool, optional): Weekly time step if True, otherwise daily. parI (tuple (2)): Prior parameters for emission model I. By default (1,1). parR (tuple (2)): Prior parameters for emission model R. By default (1,1). parD (tuple (2)): Prior parameters for emission model D. By default (1,1). random_params (bool, optional): Bayesian parameters if True, otherwise single point. """ x = _posterior_data(region, dates, weekly=weekly)\ .reset_index(drop = True) POP = population.get_population(region) # filter param params = params[params.start <= dates[1]] if (params.end > dates[1]).any(): params.loc[params.end > dates[1], 'end'] = dates[1] latent = transition(POP, initial, params, random_params=random_params) xx = x.merge(latent, how='left', on=['date']) Dw = xx.shape[0] D = (dates[1] - dates[0]).days + 1 sim_lat = np.zeros((5,N,Dw)) sim_obs = np.zeros((5,N,Dw)) for i in range(N): if i == 0 or (i+1) % 100 == 0: print('%4d / %d' % (i+1,N)) # transition latent = transition(POP, initial, params, random_params=random_params) latent[latent.I < 0]['I'] = 0 xx = x.merge(latent, how='left', on=['date']) xx.tests = xx['tests'].apply(lambda t: t if t >= 0 else 1) sim_lat[:,i,:] = xx[['S','E','I','R','D']].to_numpy().T # emission try: sim_obs[2,i,:] = emission(np.abs(xx.I.to_numpy()), xx.tests.to_numpy(), *parI) except: print(xx.I) print(xx.tests) raise sim_obs[3,i,:] = emission(xx.R.to_numpy(), xx.cumtests.to_numpy(), *parR) sim_obs[4,i,:] = emission(xx.D.to_numpy(), xx.cumtests.to_numpy(), *parD) # spare last last_values = sim_lat[:,:,-1].mean(axis = 1) # denormalize probability sim_lat[1:3,:,:] = sim_lat[1:3,:,:] * x.tests.to_numpy() sim_lat[3:5,:,:] = sim_lat[3:5,:,:] * x.cumtests.to_numpy() sim_obs[1:3,:,:] = sim_obs[1:3,:,:] * x.tests.to_numpy() sim_obs[3:5,:,:] = sim_obs[3:5,:,:] * x.cumtests.to_numpy() return (sim_lat, sim_obs), last_values
def main(): tool.preprocess('../data/POS/train', '../data/POS/ptrain') tool.preprocess('../data/NPC/train', '../data/NPC/ptrain') e0 = em.emission() bt0 = bitr.bi_transition() tt0 = tritr.tri_transition() # print "without preprocessor" # e0.compute('../data/POS/train') # t0.compute('../data/POS/train') # e0.predict('../data/POS/dev.in','../data/POS/dev.p2.out',p=False) # print "POS,MLE:", tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out') # print "POS,MLE likelihood:", e0.filelikelihood("../data/POS/dev.p2.out",p=False) # viterbi_best(e0,t0,'../data/POS/dev.in','../data/POS/dev.p3.out',p=False) # print "POS,DP:", tool.evaluate('../data/POS/dev.p3.out','../data/POS/dev.out') # print "POS,DP likelihood:", e0.filelikelihood("../data/POS/dev.p3.out", p=False) # start = time.clock() # viterbi_Nbest(e0, t0, '../data/POS/dev.in', '../data/POS/dev.p4.out', best=1, p=False) # print "runtime:",time.clock()-start # c = 1 # while c<=1: # print c,":POS, DP2:", tool.evaluate('../data/POS/dev.p4.out', '../data/POS/dev.out',col=c) # print c,":POS, DP2 likelihood:", e0.filelikelihood("../data/POS/dev.p4.out",p=False, col=c) # c+=1 print "with preprocessor" e0.compute('../data/POS/ptrain') bt0.compute('../data/POS/ptrain') tt0.compute('../data/POS/ptrain') # e0.predict('../data/POS/test.in','../data/POS/test.p1.out') # era,eno= tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out',col=1,pr=True) # print "error rate:",era # print "POS, MLE, likelihood:",e0.filelikelihood("../data/POS/dev.p2.out") # with new smoothing 0.27637 # viterbi_best(e0,bt0,'../data/POS/dev.in','../data/POS/dev.p2.out') # era,eno = tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out',pr=True) # print "POS, DP:", era # print "POS, DP likelihood:", e0.filelikelihood("../data/POS/dev.p3.out") # start = time.clock() # 0.5 1.5 0: 0.2574 # 1 10 1: 0.2422 # 1 15 1: 0.2422 # 1 20 1: 0.239 # 1 25 1: 0.2369 # 1 30 1: 0.235 # 1 35 1: 0.2334 # viterbi_Nbest(e0, bt0, tt0, '../data/POS/dev.in', '../data/POS/dev.p5.out',lambda0=1.0, lambda1=30.0, lambda2=1.0, best=1) # print "runtime:",time.clock() - start c = 1 while c <= 1: era, eno = tool.evaluate('../data/POS/dev.p5.out', '../data/POS/dev.out', col=c, pr=True) print c, ":POS, DP2:", era # print c,":POS, DP2 likelihood:", e0.filelikelihood("../data/POS/dev.p4.out",col=c) c += 1
def main(): tool.preprocess('../data/POS/train', '../data/POS/ptrain') tool.preprocess('../data/NPC/train', '../data/NPC/ptrain') e0 = em.emission() bt0 = bitr.bi_transition() tt0 = tritr.tri_transition() # print "without preprocessor" # e0.compute('../data/POS/train') # t0.compute('../data/POS/train') # e0.predict('../data/POS/dev.in','../data/POS/dev.p2.out',p=False) # print "POS,MLE:", tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out') # print "POS,MLE likelihood:", e0.filelikelihood("../data/POS/dev.p2.out",p=False) # viterbi_best(e0,t0,'../data/POS/dev.in','../data/POS/dev.p3.out',p=False) # print "POS,DP:", tool.evaluate('../data/POS/dev.p3.out','../data/POS/dev.out') # print "POS,DP likelihood:", e0.filelikelihood("../data/POS/dev.p3.out", p=False) # start = time.clock() # viterbi_Nbest(e0, t0, '../data/POS/dev.in', '../data/POS/dev.p4.out', best=1, p=False) # print "runtime:",time.clock()-start # c = 1 # while c<=1: # print c,":POS, DP2:", tool.evaluate('../data/POS/dev.p4.out', '../data/POS/dev.out',col=c) # print c,":POS, DP2 likelihood:", e0.filelikelihood("../data/POS/dev.p4.out",p=False, col=c) # c+=1 print "with preprocessor" e0.compute('../data/POS/ptrain') bt0.compute('../data/POS/ptrain') tt0.compute('../data/POS/ptrain') # e0.predict('../data/POS/test.in','../data/POS/test.p1.out') # era,eno= tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out',col=1,pr=True) # print "error rate:",era # print "POS, MLE, likelihood:",e0.filelikelihood("../data/POS/dev.p2.out") # with new smoothing 0.27637 # viterbi_best(e0,bt0,'../data/POS/dev.in','../data/POS/dev.p2.out') # era,eno = tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out',pr=True) # print "POS, DP:", era # print "POS, DP likelihood:", e0.filelikelihood("../data/POS/dev.p3.out") # start = time.clock() # 0.5 1.5 0: 0.2574 # 1 10 1: 0.2422 # 1 15 1: 0.2422 # 1 20 1: 0.239 # 1 25 1: 0.2369 # 1 30 1: 0.235 # 1 35 1: 0.2334 # viterbi_Nbest(e0, bt0, tt0, '../data/POS/dev.in', '../data/POS/dev.p5.out',lambda0=1.0, lambda1=30.0, lambda2=1.0, best=1) # print "runtime:",time.clock() - start c = 1 while c <= 1: era, eno = tool.evaluate('../data/POS/dev.p5.out', '../data/POS/dev.out',col=c,pr=True) print c,":POS, DP2:",era # print c,":POS, DP2 likelihood:", e0.filelikelihood("../data/POS/dev.p4.out",col=c) c += 1
def main(): tool.preprocess('../data/POS/train', '../data/POS/ptrain') tool.preprocess('../data/NPC/train', '../data/NPC/ptrain') e0 = em.emission() t0 = tr.transition() print "without preprocessor" e0.compute('../data/POS/train') t0.compute('../data/POS/train') e0.predict('../data/POS/dev.in','../data/POS/dev.p2.out',p=False) print "POS,MLE:", tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out') viterbi_best(e0,t0,'../data/POS/dev.in','../data/POS/dev.p3.out',p=False) print "POS,DP:", tool.evaluate('../data/POS/dev.p3.out','../data/POS/dev.out')
def main(): tool.preprocess('../data/POS/train', '../data/POS/ptrain') tool.preprocess('../data/NPC/train', '../data/NPC/ptrain') e0 = em.emission() t0 = tr.transition() print "without preprocessor" e0.compute('../data/POS/train') t0.compute('../data/POS/train') e0.predict('../data/POS/dev.in', '../data/POS/dev.p2.out', p=False) print "POS,MLE:", tool.evaluate('../data/POS/dev.p2.out', '../data/POS/dev.out') viterbi_best(e0, t0, '../data/POS/dev.in', '../data/POS/dev.p3.out', p=False) print "POS,DP:", tool.evaluate('../data/POS/dev.p3.out', '../data/POS/dev.out')
) parser.add_argument('-b', dest='best', type=int, choices=[1, 10], default=1, help='number of best tags to generate') parser.add_argument('-p', dest='process', type=bool, default=True, help='whether do process or not') args = parser.parse_args() print args if args.algorithm == 0: e = em.emission() e.compute(args.trainfile) e.predict(args.infile, args.outfile, args.process) # print tool.evaluate('../data/POS/dev.out',args.outfile,col=1) elif args.algorithm == 1: if args.best != 1: print "Error: best must be 1 with algorithm 1" exit(0) #run original version of viterbi e = em.emission() e.compute(args.trainfile) t = tr.transition() t.compute(args.trainfile) viterbi.viterbi_best(e, t, args.infile, args.outfile, args.process) # print tool.evaluate('../data/POS/dev.out',args.outfile,col=1)
import toolbox as tool import viterbi as viterbi import argparse parser = argparse.ArgumentParser(description="twitter POS tagger implemented with hidden markov model") parser.add_argument("-t",dest='trainfile',required=True,help='path of training file') parser.add_argument("-i",dest='infile',required=True,help='path of input file') parser.add_argument("-o",dest='outfile',required=True,help='path of output file') parser.add_argument("--algorithm",dest="algorithm",type=int,choices=[0,1,2],required=True,help="0:MLE with emission probability\n1:viterbi_best,top 1 sequence\n2:viterbi_Nbest,top 1 or 10 best sequence") parser.add_argument('-b',dest='best',type=int,choices=[1,10],default=1,help='number of best tags to generate') parser.add_argument('-p',dest='process',type=bool,default=True,help='whether do process or not') args = parser.parse_args() print args if args.algorithm==0: e = em.emission() e.compute(args.trainfile) e.predict(args.infile,args.outfile,args.process) # print tool.evaluate('../data/POS/dev.out',args.outfile,col=1) elif args.algorithm==1: if args.best != 1: print "Error: best must be 1 with algorithm 1" exit(0) #run original version of viterbi e = em.emission() e.compute(args.trainfile) t = tr.transition() t.compute(args.trainfile) viterbi.viterbi_best(e,t,args.infile,args.outfile,args.process) # print tool.evaluate('../data/POS/dev.out',args.outfile,col=1)