Exemple #1
0
        # pass transformed data to nb for prediction
        return self.style_nb_clf.predict(x_t)

if __name__ == "__main__":

    from db.styles import Styles
    from db.basewordcts import BaseWordFreq
    from sklearn import cross_validation as c_v
    from sklearn.metrics import classification_report, confusion_matrix

    print 'Load baseline stop words'
    baseline = BaseWordFreq()
    baseline.load_all()

    print 'Get reviews by style'
    styles = Styles()

    # get top n styles by review count
    sty_ids = styles.review_counts(10).keys()
    X = styles.beer_reviews_rollup(sty_ids, limit=0)
    print 'Styles Retrieved: %s' % len(np.unique(X['style_id'].values))
    print 'Beers Retrieved : %s' % len(np.unique(X['beer_id'].values))
    print 'Total Rev Docs  : %s' % len(X.index)

    X_train, X_test, y_train, y_test = c_v.train_test_split(X['review'],
                                                            X['style_id'],
                                                            test_size=0.2,
                                                            random_state=0)

    clf = StyleTfidfNB(max_features=None,
                       ngram_range=(1, 2),
Exemple #2
0
 def __init__(self):
   self.region = Locations()
   self.brewer = Brewers()
   self.style = Styles()
   self.beer = Beers()