コード例 #1
0
def user_fourgram_pp(users, V, prior, alpha_bi=1e-2, alpha_tri=0.1,
                     alpha_four=0.01):
    pp_in, pp_out = [], []
    pred_in, pred_out = [], []
    count = 0
    print 'Estimating fourgram models for each user...'
    for user in users:
        if (user.getActiveDays() < 60):
            continue
        X_train, X_test = build_corpus(user.tripList, 30)
        fourgram = lm.fourgramModel(X_train, V, prior=prior,
                                    alpha_bi=alpha_bi, alpha_tri=alpha_tri,
                                    alpha_four=alpha_four)
        ppIn, ppOut = fourgram.perplexity_OD(X_test)
        predIn, predOut = fourgram.prediction(X_test)
        pp_in.append(ppIn)
        pp_out.append(ppOut)
        pred_in.append(predIn)
        pred_out.append(predOut)
        count += 1
        if count % 100 == 0:
            print count
    print 'Median In Perplexity = {}'.format(np.median(pp_in))
    print 'Median Out Perplexity = {}'.format(np.median(pp_out))
    print 'Median In Prediction Accuracy = {}'.format(np.median(pred_in))
    print 'Median Out Prediction Accuracy = {}'.format(np.median(pred_out))
    return pp_in, pp_out, pred_in, pred_out
コード例 #2
0
def user_bigram(users, V, prior=None, alpha=1e-2):
    perplexity = []
    print 'Estimating bigram models for each user...'
    for user in users:
        if (user.getActiveDays() < 60):
            continue
        X_train, X_test = build_corpus(user.tripList, 30)
        bigram = lm.bigramModel(X_train, V, prior=prior, alpha=alpha)
        perplexity.append(bigram.perplexity(X_test))
    print 'Median Perplexity = {}'.format(np.median(perplexity))
    return perplexity
コード例 #3
0
def user_trigram(users, V, prior, alpha_bi=1e-2, alpha_tri=0.1):
    pp = []
    count = 0
    print 'Estimating trigram models for each user...'
    for user in users:
        if (user.getActiveDays() < 60):
            continue
        X_train, X_test = build_corpus(user.tripList, 30)
        trigram = lm.trigramModel(X_train, V, prior=prior,
                                  alpha_bi=alpha_bi, alpha_tri=alpha_tri)
        perplexity = trigram.perplexity(X_test)
        pp.append(perplexity)
        count += 1
        if count % 100 == 0:
            print count
    print 'Median Perplexity = {}'.format(np.median(pp))
    return pp
コード例 #4
0
def user_bigram_pp(users, V, prior, alpha=1e-2):
    pp_in, pp_out = [], []
    pred_in, pred_out = [], []
    print 'Estimating bigram models for each user...'
    for user in users:
        if (user.getActiveDays() < 60):
            continue
        X_train, X_test = build_corpus(user.tripList, 30)
        bigram = lm.bigramModel(X_train, V, prior=prior, alpha=alpha)
        ppIn, ppOut = bigram.perplexity_OD(X_test)
        predIn, predOut = bigram.prediction(X_test)
        pp_in.append(ppIn)
        pp_out.append(ppOut)
        pred_in.append(predIn)
        pred_out.append(predOut)
    print 'Median In Perplexity = {}'.format(np.median(pp_in))
    print 'Median Out Perplexity = {}'.format(np.median(pp_out))
    print 'Median In Prediction Accuracy = {}'.format(np.median(pred_in))
    print 'Median Out Prediction Accuracy = {}'.format(np.median(pred_out))
    return pp_in, pp_out, pred_in, pred_out
コード例 #5
0
def popu_bigram(users, V, alpha):
    counter = 0
    trainSet = []
    testList = []
    print 'Estimating bigram model for the whole population...'
    for user in users:
        if (user.getActiveDays() < 60):
            continue
        X_train, X_test = build_corpus(user.tripList, 30)
        trainSet.extend(X_train)
        testList.append(X_test)
        counter += 1
    print 'Number of users = {}'.format(counter)
    print 'Number of user days in training set = {}'.format(len(trainSet))
    bigram = lm.bigramModel(trainSet, V, alpha=alpha, lowthreshold=0)
    perplexity = []
    for X_test in testList:
        perplexity.append(bigram.perplexity(X_test))
    print 'Median Perplexity = {}'.format(np.median(perplexity))
    return perplexity
コード例 #6
0
def construct_priors(users, V):
    counter = 0
    C = []
    for user in users:
        if (user.getActiveDays() < 60):
            continue
        X_train, X_test = build_corpus(user.tripList, 30)
        C.extend(X_train)
        counter += 1
    print 'Number of users = {}'.format(counter)
    print 'Number of user days in training set = {}'.format(len(C))
    bigram = lm.bigramModel(C, V, alpha=1e-4, lowthreshold=0)
    p_in, p_out = bigram.get_params()
    '''
    N = len(V.keys())
    wt_in = csv.writer(open("../Data/prior_in.csv", 'wb'))
    for i in xrange(N):
        print p_in[i, :].tolist()
        wt_in.writerows(p_in[i, :].tolist())
    wt_out = csv.writer(open("../Data/prior_out.csv", 'wb'))
    for i in xrange(N):
        wt_out.writerows(p_out[i, :].tolist())
    '''
    return (p_in, p_out)