예제 #1
0
    def _format_word(self, word):
        """
        Lemmatize (https://en.wikipedia.org/wiki/Lemma_(morphology)),
        capitalize, and remove punctuation (other than apostrophes).
        """
        punctuation_stripped = re.sub(r"[^\w\d'\s]+", '', word)
        lemmatized = Word(punctuation_stripped).lemmatize()
        capitalized = lemmatized.capitalize()

        return capitalized
예제 #2
0
def predict():
    if request.method == 'POST':
        message = request.form['message']
        if (len(message) > 2):
            text = message
            pre_processed_reviews = []
            data = gensim.utils.simple_preprocess(text, min_len=2)
            review = ' '.join(WordNetLemmatizer().lemmatize(word)
                              for word in data if word not in stop_words)
            pre_processed_reviews.append(review.strip())
            tfidf_model = joblib.load(MODEL_tfidf)
            vect = tfidf_model.transform(pre_processed_reviews)
            lr_model = joblib.load(MODEL_lr)
            my_prediction = lr_model.predict(vect)
        else:
            my_prediction = 3
            return render_template('home.html', prediction=my_prediction)

        blob = TextBlob(text)
        nouns = list()
        for word, tag in blob.tags:
            if tag == 'NN':
                nouns.append(word.lemmatize())
        display = []
        output = ""
        for item in random.sample(nouns, len(nouns)):
            word = Word(item)
            if word not in display:
                display.append(word.capitalize())

        for i in display:
            if len(i) > 2:
                output = output + " " + i
            else:
                output = ""

        return render_template('home.html',
                               prediction=my_prediction,
                               summary=output)