def user_bigram(users, V, prior=None, alpha=1e-2): perplexity = [] print 'Estimating bigram models for each user...' for user in users: if (user.getActiveDays() < 60): continue X_train, X_test = build_corpus(user.tripList, 30) bigram = lm.bigramModel(X_train, V, prior=prior, alpha=alpha) perplexity.append(bigram.perplexity(X_test)) print 'Median Perplexity = {}'.format(np.median(perplexity)) return perplexity
def user_bigram_pp(users, V, prior, alpha=1e-2): pp_in, pp_out = [], [] pred_in, pred_out = [], [] print 'Estimating bigram models for each user...' for user in users: if (user.getActiveDays() < 60): continue X_train, X_test = build_corpus(user.tripList, 30) bigram = lm.bigramModel(X_train, V, prior=prior, alpha=alpha) ppIn, ppOut = bigram.perplexity_OD(X_test) predIn, predOut = bigram.prediction(X_test) pp_in.append(ppIn) pp_out.append(ppOut) pred_in.append(predIn) pred_out.append(predOut) print 'Median In Perplexity = {}'.format(np.median(pp_in)) print 'Median Out Perplexity = {}'.format(np.median(pp_out)) print 'Median In Prediction Accuracy = {}'.format(np.median(pred_in)) print 'Median Out Prediction Accuracy = {}'.format(np.median(pred_out)) return pp_in, pp_out, pred_in, pred_out
def popu_bigram(users, V, alpha): counter = 0 trainSet = [] testList = [] print 'Estimating bigram model for the whole population...' for user in users: if (user.getActiveDays() < 60): continue X_train, X_test = build_corpus(user.tripList, 30) trainSet.extend(X_train) testList.append(X_test) counter += 1 print 'Number of users = {}'.format(counter) print 'Number of user days in training set = {}'.format(len(trainSet)) bigram = lm.bigramModel(trainSet, V, alpha=alpha, lowthreshold=0) perplexity = [] for X_test in testList: perplexity.append(bigram.perplexity(X_test)) print 'Median Perplexity = {}'.format(np.median(perplexity)) return perplexity
def construct_priors(users, V): counter = 0 C = [] for user in users: if (user.getActiveDays() < 60): continue X_train, X_test = build_corpus(user.tripList, 30) C.extend(X_train) counter += 1 print 'Number of users = {}'.format(counter) print 'Number of user days in training set = {}'.format(len(C)) bigram = lm.bigramModel(C, V, alpha=1e-4, lowthreshold=0) p_in, p_out = bigram.get_params() ''' N = len(V.keys()) wt_in = csv.writer(open("../Data/prior_in.csv", 'wb')) for i in xrange(N): print p_in[i, :].tolist() wt_in.writerows(p_in[i, :].tolist()) wt_out = csv.writer(open("../Data/prior_out.csv", 'wb')) for i in xrange(N): wt_out.writerows(p_out[i, :].tolist()) ''' return (p_in, p_out)