def _format_word(self, word): """ Lemmatize (https://en.wikipedia.org/wiki/Lemma_(morphology)), capitalize, and remove punctuation (other than apostrophes). """ punctuation_stripped = re.sub(r"[^\w\d'\s]+", '', word) lemmatized = Word(punctuation_stripped).lemmatize() capitalized = lemmatized.capitalize() return capitalized
def predict(): if request.method == 'POST': message = request.form['message'] if (len(message) > 2): text = message pre_processed_reviews = [] data = gensim.utils.simple_preprocess(text, min_len=2) review = ' '.join(WordNetLemmatizer().lemmatize(word) for word in data if word not in stop_words) pre_processed_reviews.append(review.strip()) tfidf_model = joblib.load(MODEL_tfidf) vect = tfidf_model.transform(pre_processed_reviews) lr_model = joblib.load(MODEL_lr) my_prediction = lr_model.predict(vect) else: my_prediction = 3 return render_template('home.html', prediction=my_prediction) blob = TextBlob(text) nouns = list() for word, tag in blob.tags: if tag == 'NN': nouns.append(word.lemmatize()) display = [] output = "" for item in random.sample(nouns, len(nouns)): word = Word(item) if word not in display: display.append(word.capitalize()) for i in display: if len(i) > 2: output = output + " " + i else: output = "" return render_template('home.html', prediction=my_prediction, summary=output)