コード例 #1
0
def simulate_posterior(region, params, dates, initial, N = 1000, weekly = False,
                       parI = (1,1), parR = (1,1),parD = (1,1), random_params = False):
    """Simulate from the HMM model.
    
    Args:
        region (str): Region for the data.
        params (list): Optimized parameters.
        dates (tuple (2)): Date range of the data.
        initial (dict): Initial values in dict with keys S,E,I,R,D.
        N (int): Number of samples.
        weekly (bool, optional): Weekly time step if True, otherwise daily.
        parI (tuple (2)): Prior parameters for emission model I. By default (1,1).
        parR (tuple (2)): Prior parameters for emission model R. By default (1,1).
        parD (tuple (2)): Prior parameters for emission model D. By default (1,1).
        random_params (bool, optional): Bayesian parameters if True, otherwise single point.
    """
    x = _posterior_data(region, dates, weekly=weekly)\
        .reset_index(drop = True)
    POP = population.get_population(region)
    # filter param
    params = params[params.start <= dates[1]]
    if (params.end > dates[1]).any():
        params.loc[params.end > dates[1], 'end'] = dates[1]
    latent = transition(POP, initial, params, random_params=random_params)
    xx = x.merge(latent, how='left', on=['date'])
    Dw = xx.shape[0]
    D = (dates[1] - dates[0]).days + 1
    sim_lat = np.zeros((5,N,Dw))
    sim_obs = np.zeros((5,N,Dw))
    for i in range(N):
        if i == 0 or (i+1) % 100 == 0:
            print('%4d / %d' % (i+1,N))
        # transition
        latent = transition(POP, initial, params, random_params=random_params)
        latent[latent.I < 0]['I'] = 0
        xx = x.merge(latent, how='left', on=['date'])
        xx.tests = xx['tests'].apply(lambda t: t if t >= 0 else 1)
        sim_lat[:,i,:] = xx[['S','E','I','R','D']].to_numpy().T
        # emission
        try:
            sim_obs[2,i,:] = emission(np.abs(xx.I.to_numpy()), xx.tests.to_numpy(), *parI)
        except:
            print(xx.I)
            print(xx.tests)
            raise
        sim_obs[3,i,:] = emission(xx.R.to_numpy(), xx.cumtests.to_numpy(), *parR)
        sim_obs[4,i,:] = emission(xx.D.to_numpy(), xx.cumtests.to_numpy(), *parD)
    # spare last
    last_values = sim_lat[:,:,-1].mean(axis = 1)
    # denormalize probability
    sim_lat[1:3,:,:] = sim_lat[1:3,:,:] * x.tests.to_numpy()
    sim_lat[3:5,:,:] = sim_lat[3:5,:,:] * x.cumtests.to_numpy()
    sim_obs[1:3,:,:] = sim_obs[1:3,:,:] * x.tests.to_numpy()
    sim_obs[3:5,:,:] = sim_obs[3:5,:,:] * x.cumtests.to_numpy()
    return (sim_lat, sim_obs), last_values
コード例 #2
0
def main():
    tool.preprocess('../data/POS/train', '../data/POS/ptrain')
    tool.preprocess('../data/NPC/train', '../data/NPC/ptrain')

    e0 = em.emission()
    bt0 = bitr.bi_transition()
    tt0 = tritr.tri_transition()
    # print "without preprocessor"
    # e0.compute('../data/POS/train')
    # t0.compute('../data/POS/train')
    # e0.predict('../data/POS/dev.in','../data/POS/dev.p2.out',p=False)
    # print "POS,MLE:", tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out')
    # print "POS,MLE likelihood:", e0.filelikelihood("../data/POS/dev.p2.out",p=False)
    # viterbi_best(e0,t0,'../data/POS/dev.in','../data/POS/dev.p3.out',p=False)
    # print "POS,DP:", tool.evaluate('../data/POS/dev.p3.out','../data/POS/dev.out')
    # print "POS,DP likelihood:", e0.filelikelihood("../data/POS/dev.p3.out", p=False)
    # start = time.clock()
    # viterbi_Nbest(e0, t0, '../data/POS/dev.in', '../data/POS/dev.p4.out', best=1, p=False)
    # print "runtime:",time.clock()-start
    # c = 1
    # while c<=1:
    # print c,":POS, DP2:", tool.evaluate('../data/POS/dev.p4.out', '../data/POS/dev.out',col=c)
    # print c,":POS, DP2 likelihood:", e0.filelikelihood("../data/POS/dev.p4.out",p=False, col=c)
    # c+=1

    print "with preprocessor"
    e0.compute('../data/POS/ptrain')
    bt0.compute('../data/POS/ptrain')
    tt0.compute('../data/POS/ptrain')
    # e0.predict('../data/POS/test.in','../data/POS/test.p1.out')
    # era,eno= tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out',col=1,pr=True)
    # print "error rate:",era
    # print "POS, MLE, likelihood:",e0.filelikelihood("../data/POS/dev.p2.out")
    # with new smoothing 0.27637
    # viterbi_best(e0,bt0,'../data/POS/dev.in','../data/POS/dev.p2.out')
    # era,eno = tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out',pr=True)
    # print "POS, DP:", era
    # print "POS, DP likelihood:", e0.filelikelihood("../data/POS/dev.p3.out")
    # start = time.clock()
    # 0.5 1.5 0: 0.2574
    # 1 10 1: 0.2422
    # 1 15 1: 0.2422
    # 1 20 1: 0.239
    # 1 25 1: 0.2369
    # 1 30 1: 0.235
    # 1 35 1: 0.2334
    # viterbi_Nbest(e0, bt0, tt0, '../data/POS/dev.in', '../data/POS/dev.p5.out',lambda0=1.0, lambda1=30.0, lambda2=1.0, best=1)
    # print "runtime:",time.clock() - start
    c = 1
    while c <= 1:
        era, eno = tool.evaluate('../data/POS/dev.p5.out',
                                 '../data/POS/dev.out',
                                 col=c,
                                 pr=True)
        print c, ":POS, DP2:", era  # print c,":POS, DP2 likelihood:", e0.filelikelihood("../data/POS/dev.p4.out",col=c)
        c += 1
コード例 #3
0
def main():
    tool.preprocess('../data/POS/train', '../data/POS/ptrain')
    tool.preprocess('../data/NPC/train', '../data/NPC/ptrain')

    e0 = em.emission()
    bt0 = bitr.bi_transition()
    tt0 = tritr.tri_transition()
    # print "without preprocessor"
    # e0.compute('../data/POS/train')
    # t0.compute('../data/POS/train')
    # e0.predict('../data/POS/dev.in','../data/POS/dev.p2.out',p=False)
    # print "POS,MLE:", tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out')
    # print "POS,MLE likelihood:", e0.filelikelihood("../data/POS/dev.p2.out",p=False)
    # viterbi_best(e0,t0,'../data/POS/dev.in','../data/POS/dev.p3.out',p=False)
    # print "POS,DP:", tool.evaluate('../data/POS/dev.p3.out','../data/POS/dev.out')
    # print "POS,DP likelihood:", e0.filelikelihood("../data/POS/dev.p3.out", p=False)
    # start = time.clock()
    # viterbi_Nbest(e0, t0, '../data/POS/dev.in', '../data/POS/dev.p4.out', best=1, p=False)
    # print "runtime:",time.clock()-start
    # c = 1
    # while c<=1:
        # print c,":POS, DP2:", tool.evaluate('../data/POS/dev.p4.out', '../data/POS/dev.out',col=c)
        # print c,":POS, DP2 likelihood:", e0.filelikelihood("../data/POS/dev.p4.out",p=False, col=c)
        # c+=1

    print "with preprocessor"
    e0.compute('../data/POS/ptrain')
    bt0.compute('../data/POS/ptrain')
    tt0.compute('../data/POS/ptrain')
    # e0.predict('../data/POS/test.in','../data/POS/test.p1.out')
    # era,eno= tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out',col=1,pr=True)
    # print "error rate:",era
    # print "POS, MLE, likelihood:",e0.filelikelihood("../data/POS/dev.p2.out")
    # with new smoothing 0.27637
    # viterbi_best(e0,bt0,'../data/POS/dev.in','../data/POS/dev.p2.out')
    # era,eno = tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out',pr=True)
    # print "POS, DP:", era
    # print "POS, DP likelihood:", e0.filelikelihood("../data/POS/dev.p3.out")
    # start = time.clock()
    # 0.5 1.5 0: 0.2574
    # 1 10 1: 0.2422
    # 1 15 1: 0.2422
    # 1 20 1: 0.239
    # 1 25 1: 0.2369
    # 1 30 1: 0.235
    # 1 35 1: 0.2334
    # viterbi_Nbest(e0, bt0, tt0, '../data/POS/dev.in', '../data/POS/dev.p5.out',lambda0=1.0, lambda1=30.0, lambda2=1.0, best=1)
    # print "runtime:",time.clock() - start
    c = 1
    while c <= 1:
        era, eno = tool.evaluate('../data/POS/dev.p5.out', '../data/POS/dev.out',col=c,pr=True)
        print c,":POS, DP2:",era       # print c,":POS, DP2 likelihood:", e0.filelikelihood("../data/POS/dev.p4.out",col=c)
        c += 1
コード例 #4
0
def main():
    tool.preprocess('../data/POS/train', '../data/POS/ptrain')
    tool.preprocess('../data/NPC/train', '../data/NPC/ptrain')

    e0 = em.emission()
    t0 = tr.transition()
    print "without preprocessor"
    e0.compute('../data/POS/train')
    t0.compute('../data/POS/train')
    e0.predict('../data/POS/dev.in','../data/POS/dev.p2.out',p=False)
    print "POS,MLE:", tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out')
    viterbi_best(e0,t0,'../data/POS/dev.in','../data/POS/dev.p3.out',p=False)
    print "POS,DP:", tool.evaluate('../data/POS/dev.p3.out','../data/POS/dev.out')
コード例 #5
0
def main():
    tool.preprocess('../data/POS/train', '../data/POS/ptrain')
    tool.preprocess('../data/NPC/train', '../data/NPC/ptrain')

    e0 = em.emission()
    t0 = tr.transition()
    print "without preprocessor"
    e0.compute('../data/POS/train')
    t0.compute('../data/POS/train')
    e0.predict('../data/POS/dev.in', '../data/POS/dev.p2.out', p=False)
    print "POS,MLE:", tool.evaluate('../data/POS/dev.p2.out',
                                    '../data/POS/dev.out')
    viterbi_best(e0,
                 t0,
                 '../data/POS/dev.in',
                 '../data/POS/dev.p3.out',
                 p=False)
    print "POS,DP:", tool.evaluate('../data/POS/dev.p3.out',
                                   '../data/POS/dev.out')
コード例 #6
0
)
parser.add_argument('-b',
                    dest='best',
                    type=int,
                    choices=[1, 10],
                    default=1,
                    help='number of best tags to generate')
parser.add_argument('-p',
                    dest='process',
                    type=bool,
                    default=True,
                    help='whether do process or not')
args = parser.parse_args()
print args
if args.algorithm == 0:
    e = em.emission()
    e.compute(args.trainfile)
    e.predict(args.infile, args.outfile, args.process)
    # print tool.evaluate('../data/POS/dev.out',args.outfile,col=1)

elif args.algorithm == 1:
    if args.best != 1:
        print "Error: best must be 1 with algorithm 1"
        exit(0)
#run original version of viterbi
    e = em.emission()
    e.compute(args.trainfile)
    t = tr.transition()
    t.compute(args.trainfile)
    viterbi.viterbi_best(e, t, args.infile, args.outfile, args.process)
    # print tool.evaluate('../data/POS/dev.out',args.outfile,col=1)
コード例 #7
0
ファイル: run.py プロジェクト: BernardYuan/Twitter-POS-Tagger
import toolbox as tool
import viterbi as viterbi
import argparse

parser = argparse.ArgumentParser(description="twitter POS tagger implemented with hidden markov model")

parser.add_argument("-t",dest='trainfile',required=True,help='path of training file')
parser.add_argument("-i",dest='infile',required=True,help='path of input file')
parser.add_argument("-o",dest='outfile',required=True,help='path of output file')
parser.add_argument("--algorithm",dest="algorithm",type=int,choices=[0,1,2],required=True,help="0:MLE with emission probability\n1:viterbi_best,top 1 sequence\n2:viterbi_Nbest,top 1 or 10 best sequence")
parser.add_argument('-b',dest='best',type=int,choices=[1,10],default=1,help='number of best tags to generate')
parser.add_argument('-p',dest='process',type=bool,default=True,help='whether do process or not')
args = parser.parse_args()
print args
if args.algorithm==0:
	e = em.emission()
	e.compute(args.trainfile)
	e.predict(args.infile,args.outfile,args.process)
	# print tool.evaluate('../data/POS/dev.out',args.outfile,col=1)

elif args.algorithm==1:
	if args.best != 1:
		print "Error: best must be 1 with algorithm 1"
		exit(0)
#run original version of viterbi
	e = em.emission()
	e.compute(args.trainfile)
	t = tr.transition()
	t.compute(args.trainfile)
	viterbi.viterbi_best(e,t,args.infile,args.outfile,args.process)
	# print tool.evaluate('../data/POS/dev.out',args.outfile,col=1)