class Classifications(): #static variables _category_path = os.path.join(os.path.dirname(__file__), "classifiers/category.slp") _rating_path = os.path.join(os.path.dirname(__file__), "classifiers/rating.slp") _rating_nlp_path = os.path.join(os.path.dirname(__file__), "classifiers/rating_nlp.svm") _sentiment_path = os.path.join(os.path.dirname(__file__), "classifiers/sentiment.nb") _category = SLP.load(_category_path) _rating = SLP.load(_rating_path) _rating_nlp = SVM.load(_rating_nlp_path) _sentiment = NB.load(_sentiment_path) @staticmethod def selectWords(review): ''' a function that gets a review and selects the nouns, adjectives, verbs and exclamation mark ''' review = parsetree(review, lemmata=True)[0] #lemmatize the review #select adjectives (JJ), nouns (NN), verbs (VB) and exclamation marks review = [ w.lemma for w in review if w.tag.startswith(('JJ', 'NN', 'VB', '!')) ] review = count(review) #a dictionary of (word, count) return review @staticmethod def classify(text): predicted_category = Classifications._category.classify(Document(text), discrete=True) predicted_rate = Classifications._rating.classify(Document(text), discrete=True) predicted_rate_nlp = Classifications._rating_nlp.classify( Classifications.selectWords(text), discrete=True) predicted_sentiment_dict = Classifications._sentiment.classify( Classifications.selectWords(text), discrete=False) predicted_sentiment = True if str( sorted(predicted_sentiment_dict.items(), key=operator.itemgetter(1), reverse=True)[1][0]) in ['True', '3.0', '4.0', '5.0' ] else False return { 'text': text, 'rate': predicted_rate, 'category': predicted_category, 'rate_nlp': predicted_rate_nlp, 'positivity': predicted_sentiment }
""" views imports app, auth, and models, but none of these import views """ from flask import render_template, redirect, request, url_for, jsonify from flask.ext.classy import FlaskView from app import app from auth import auth from models import User # Classifier, CSV Loading from pattern.vector import Document, NB from pattern.db import Datasheet # Load classifier nb = NB.load("project/data/amazonClassifier") @app.route('/classify', methods=['POST']) def classify_review(): text = request.form.get('text') return jsonify(result=nb.classify(text.strip())) class BaseView(FlaskView): '''Basic views, such as the home and about page.''' route_base = '/' def index(self): return render_template('home.html')
class NBModel: def __init__(self): self.nb = NB() self.stats = Statistics() try: print("dir: " + os.getcwd()) if os.getcwd().endswith("tv_ratings_frontend"): print("Working in django") self.nb = self.nb.load("ratings_frontend/backend/pattern_ml/nb_training.p") else: print("Not working in django") self.nb = self.nb.load("./nb_training.p") self.new_nb_model = True print("Using existing pickled model") except IOError: self.new_nb_model = False print("Creating new NB model") def nb_train_text(self, reviews): for review in reviews: if review.rating is not None:# and review.rating < 10 and review.rating > 1: v = Document(review.text, type=int(review.rating), stopwords=True) self.nb.train(v) self.nb.save("./nb_training.p") # print self.nb.classes def nb_train_summary(self, reviews): for review in reviews: if review.rating is not None:# and review.rating < 10 and review.rating > 1: v = Document(review.summary, type=int(review.rating), stopwords=True) self.nb.train(v) def nb_train_all_text(self, review_set): for review_list in review_set: self.nb_train_text(review_list) self.nb.save_model() def save_model(self): # print "" self.nb.save('./nb_training.p') def nb_test_imdb(self, reviews): arr = [] for review in reviews: if review.rating is not None: v = Document(self.review_to_words(review.text), type=int(review.rating), stopwords=True) arr.append(v) print self.nb.test(arr, target=None) def nb_classify_tweets(self, tvshow, tweets): ratingSum = 0 tweet_docs = [(self.nb.classify(Document(self.review_to_words(tweet))), self.review_to_words(tweet)) for tweet in tweets] for tweet in tweet_docs: ratingSum += tweet[0] #print tweet # print tweet self.nb_stats() Statistics().printStats(tvshow, ratingSum, len(tweet_docs)) print self.nb.distribution return Statistics().get_stats(tvshow, ratingSum, len(tweet_docs)) def nb_stats(self): print('----------- Classifier stats -----------') # print("Features: ", self.nb.features) print("Classes: ", self.nb.classes) print("Skewness: ", self.nb.skewness) print("Distribution: ", self.nb.distribution) print("Majority: ", self.nb.majority) print("Minority: ", self.nb.minority) def review_to_words(self, raw_review): no_url = re.sub("http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+", "", raw_review) # Remove numerics letters_only = re.sub("[^a-zA-Z]", " ", no_url) # to lowercase words = letters_only.lower().split() # remove stop words - the, of , a .... stops = set(stopwords.words("english")) meaningful_words = [w for w in words if not w in stops] return (" ".join(meaningful_words))
""" views imports app, auth, and models, but none of these import views """ from flask import render_template, redirect, request, url_for, jsonify from flask.ext.classy import FlaskView from app import app from auth import auth from models import User # Classifier, CSV Loading from pattern.vector import Document, NB from pattern.db import Datasheet # Load classifier nb = NB.load("project/data/amazonClassifier") @app.route('/classify', methods=['POST']) def classify_review(): text = request.form.get('text') return jsonify(result=nb.classify(text.strip())) class BaseView(FlaskView): '''Basic views, such as the home and about page.''' route_base = '/' def index(self): return render_template('home.html') BaseView.register(app)