def user_trigram_pp(users, V, prior, alpha_bi=1e-2, alpha_tri=0.01): pp_in, pp_out = [], [] pred_in, pred_out = [], [] count = 0 print 'Estimating trigram models for each user...' for user in users: if (user.getActiveDays() < 60): continue X_train, X_test = build_corpus(user.tripList, 30) trigram = lm.trigramModel(X_train, V, prior=prior, alpha_bi=alpha_bi, alpha_tri=alpha_tri) ppIn, ppOut = trigram.perplexity_OD(X_test) predIn, predOut = trigram.prediction(X_test) pp_in.append(ppIn) pp_out.append(ppOut) pred_in.append(predIn) pred_out.append(predOut) count += 1 if count % 100 == 0: print count print 'Median In Perplexity = {}'.format(np.median(pp_in)) print 'Median Out Perplexity = {}'.format(np.median(pp_out)) print 'Median In Prediction Accuracy = {}'.format(np.median(pred_in)) print 'Median Out Prediction Accuracy = {}'.format(np.median(pred_out)) return pp_in, pp_out, pred_in, pred_out
def user_trigram(users, V, prior, alpha_bi=1e-2, alpha_tri=0.1): pp = [] count = 0 print 'Estimating trigram models for each user...' for user in users: if (user.getActiveDays() < 60): continue X_train, X_test = build_corpus(user.tripList, 30) trigram = lm.trigramModel(X_train, V, prior=prior, alpha_bi=alpha_bi, alpha_tri=alpha_tri) perplexity = trigram.perplexity(X_test) pp.append(perplexity) count += 1 if count % 100 == 0: print count print 'Median Perplexity = {}'.format(np.median(pp)) return pp