class PageData(object): def __init__(self): self.region = Locations() self.brewer = Brewers() self.style = Styles() self.beer = Beers() def __to_dict(self, field_dict, records): out = [] for r in records: d = {} for k,v in field_dict.iteritems(): d[k] = r[v] out.append(d) return out def brewer_regions(self): return self.__to_dict( { 'region_id': 0, 'name': 1}, self.region.top_beer_ct(10)) def styles(self): return self.__to_dict( { 'style_id': 0, 'name': 1 }, self.style.top_reviewed_styles(10)) def brewers_by_region(self, region_id): return self.__to_dict( { 'brewer_id': 0, 'name': 1 }, self.brewer.has_recommended_beers_by_loc(region_id, order_by="name")) def beers_by_brewer(self, brewer_id): return self.__to_dict( { 'beer_id': 1, 'name': 2 }, self.beer.has_rec_by_brewer(brewer_id)) def recommendations(self, beer_id, style_id): return self.__to_dict( { 'brewer_id': 2, 'brewer_name': 3, 'beer_id': 4, 'name': 5, 'score': 6 }, self.beer.recommendations(beer_id, style_id, 10)) def beer_meta(self, beer_id): return [self.beer.meta_dict(beer_id)]
def __init__(self): self.region = Locations() self.brewer = Brewers() self.style = Styles() self.beer = Beers()
# pass transformed data to nb for prediction return self.style_nb_clf.predict(x_t) if __name__ == "__main__": from db.styles import Styles from db.basewordcts import BaseWordFreq from sklearn import cross_validation as c_v from sklearn.metrics import classification_report, confusion_matrix print 'Load baseline stop words' baseline = BaseWordFreq() baseline.load_all() print 'Get reviews by style' styles = Styles() # get top n styles by review count sty_ids = styles.review_counts(10).keys() X = styles.beer_reviews_rollup(sty_ids, limit=0) print 'Styles Retrieved: %s' % len(np.unique(X['style_id'].values)) print 'Beers Retrieved : %s' % len(np.unique(X['beer_id'].values)) print 'Total Rev Docs : %s' % len(X.index) X_train, X_test, y_train, y_test = c_v.train_test_split(X['review'], X['style_id'], test_size=0.2, random_state=0) clf = StyleTfidfNB(max_features=None, ngram_range=(1, 2),
return self.style_nb_clf.predict(x_t) if __name__ == "__main__": from db.styles import Styles from db.basewordcts import BaseWordFreq from sklearn import cross_validation as c_v from sklearn.metrics import classification_report, confusion_matrix print 'Load baseline stop words' baseline = BaseWordFreq() baseline.load_all() print 'Get reviews by style' styles = Styles() # get top n styles by review count sty_ids = styles.review_counts(10).keys() X = styles.beer_reviews_rollup(sty_ids, limit=0) print 'Styles Retrieved: %s' % len(np.unique(X['style_id'].values)) print 'Beers Retrieved : %s' % len(np.unique(X['beer_id'].values)) print 'Total Rev Docs : %s' % len(X.index) X_train, X_test, y_train, y_test = c_v.train_test_split( X['review'], X['style_id'], test_size=0.2, random_state=0) clf = StyleTfidfNB( max_features=None, ngram_range=(1,2), min_df=0.05,