# pass transformed data to nb for prediction return self.style_nb_clf.predict(x_t) if __name__ == "__main__": from db.styles import Styles from db.basewordcts import BaseWordFreq from sklearn import cross_validation as c_v from sklearn.metrics import classification_report, confusion_matrix print 'Load baseline stop words' baseline = BaseWordFreq() baseline.load_all() print 'Get reviews by style' styles = Styles() # get top n styles by review count sty_ids = styles.review_counts(10).keys() X = styles.beer_reviews_rollup(sty_ids, limit=0) print 'Styles Retrieved: %s' % len(np.unique(X['style_id'].values)) print 'Beers Retrieved : %s' % len(np.unique(X['beer_id'].values)) print 'Total Rev Docs : %s' % len(X.index) X_train, X_test, y_train, y_test = c_v.train_test_split(X['review'], X['style_id'], test_size=0.2, random_state=0) clf = StyleTfidfNB(max_features=None, ngram_range=(1, 2),
def __init__(self): self.region = Locations() self.brewer = Brewers() self.style = Styles() self.beer = Beers()