Beispiel #1
0
def predict_models_header_sentiment(text_data_list):

    text_data_df = pd.DataFrame(text_data_list,columns=['id', 'header', 'sub_header'])

    # added sentiment analyzer columns to store sentiment value
    text_data_df["sentiment_for"] = ""
    text_data_df["nltk_classify"] = -1
    text_data_df["nltk_confidence"] = 0.0
    text_data_df["word2vec_classify"] = -1
    text_data_df["count_vectorizer_classify"] = -1
    text_data_df["tfidf_vectorizer_classify"] = -1

    # ---------news nltk load-------------")
    word_features = nltk_classifier.load_save_dataset('word_features.pickle')
    ensemble_clf = nltk_classifier.get_ensemble_models(None)

    # --------news word2vec load-------------
    model, w2vmodel, tfidf = word2vec_classifier_model.load_prediction_model_parameters()

    # --------spacy model load-------------
    pipe_count_vectorizer, pipe_tfidf_vectorizer = get_spacy_model()

    for row in text_data_df.itertuples(index=False):
        text = row.header

        # ---------nltk-------------")
        classify, nltk_confidence = nltk_classifier.sentiment_analyzer(text, ensemble_clf, word_features)

        nltk_classify = 0
        if classify == "pos":
            nltk_classify = 1
        elif classify == "neg":
            nltk_classify = 0
        else:
            nltk_classify = 2

        text_data_df["sentiment_for"] = "header"
        text_data_df.at[index, 'nltk_classify'] = nltk_classify
        text_data_df.at[index, 'nltk_confidence'] = nltk_confidence

        # --------news word2vec-------------
        word2vec_classify = word2vec_classifier_model.predict(model, w2vmodel, tfidf, text)

        text_data_df.at[index, 'word2vec_classify'] = word2vec_classify

        # --------spacy------------------------
        text_list = []
        text_list.append(text)

        # predict using CountVectorizer
        count_vectorizer_classify = pipe_count_vectorizer.predict(text_list)
        # predict using TfidfVectorizer
        tfidf_vectorizer_classify = pipe_tfidf_vectorizer.predict(text_list)

        text_data_df.at[index, 'count_vectorizer_classify'] = int(count_vectorizer_classify[0])
        text_data_df.at[index, 'tfidf_vectorizer_classify'] = int(tfidf_vectorizer_classify[0])

    return text_data_df
Beispiel #2
0
                  "I feel amazing!",
                  "thanks for lyft credit i can not use because they do not offer wheelchair vans in pdx disapointed getthanked",
                "Intel surges 8% on an earnings beat and better-than-expected forecast"]

    print("\n--------nltk-------------")

    # print(pre_processing.common_pre_processing_steps(tweet))
    # print("***** loading word features *****")
    word_features = nltk_classifier.load_save_dataset('word_features.pickle')

    # print("***** save train models to a ensemble *****")
    ensemble_clf = nltk_classifier.get_ensemble_models(None)

    # print("***** predict tweets sentiment *****")
    for tokens in range(len(tweet_list)):
        classify, confidence = nltk_classifier.sentiment_analyzer(tweet_list[tokens], ensemble_clf, word_features)
        print("classify - {} , confidence - {}".format(classify, confidence))

    print("\n--------news_word2vec-------------")

    model, w2vmodel, tfidf = word2vec_classifier_model.load_prediction_model_parameters()

    for tokens in range(len(tweet_list)):
        prediction = word2vec_classifier_model.predict(model, w2vmodel, tfidf, tweet_list[tokens])
        print(" prediction - {}".format(prediction))

    # --------spacy model load-------------
    print("\n--------spacy-------------")
    pipe_count_vectorizer, pipe_tfidf_vectorizer = get_spacy_model()
    text_list = []
def predict_models_header_sentiment(text_data_list):

    text_data_df = pd.DataFrame(text_data_list,
                                columns=['id', 'header', 'sub_header'])

    # delete record with empty or "NaN" value
    text_data_df.drop(text_data_df[text_data_df['header'] == "NaN"].index,
                      inplace=True)
    text_data_df.drop(text_data_df[text_data_df['header'] == ""].index,
                      inplace=True)

    # added sentiment analyzer columns to store sentiment value
    text_data_df["sentiment_for"] = ""
    text_data_df["nltk_classify"] = ""
    text_data_df["nltk_confidence"] = ""
    # text_data_df["word2vec_classify"] = -1
    text_data_df["count_vectorizer_classify"] = ""
    text_data_df["count_vectorizer_confidence"] = ""
    text_data_df["tfidf_vectorizer_classify"] = ""
    text_data_df["tfidf_vectorizer_confidence"] = ""

    # ---------news nltk load-------------")
    word_features = nltk_classifier.load_save_dataset('word_features.pickle')
    ensemble_clf = nltk_classifier.get_ensemble_models()

    # --------news word2vec load-------------
    # model, w2vmodel, tfidf = word2vec_classifier_model.load_prediction_model_parameters()

    sentiment_data_list = []

    for row in text_data_df.itertuples(index=False):
        text = row.header

        if len(text) > 0 and text != "NaN":

            try:
                # ---------nltk-------------")
                classify, nltk_confidence = nltk_classifier.sentiment_analyzer(
                    text, ensemble_clf, word_features)

                nltk_classify = 2
                if classify == "pos":
                    nltk_classify = 1
                elif classify == "neg":
                    nltk_classify = 0
                else:
                    nltk_classify = 2

                # --------news word2vec-------------
                # word2vec_classify = word2vec_classifier_model.predict(model, w2vmodel, tfidf, text)
                #
                # text_data_df['word2vec_classify'] = word2vec_classify

                # --------spacy------------------------
                classify_cVector, confidence_cVector = news_spacy_countVectorizer_model.sentiment_analyzer(
                    text)

                # predict using TfidfVectorizer
                classify_tfidf, confidence_tfidf = news_spacy_tfidfVectorizer_model.sentiment_analyzer(
                    text)

                sentiment_data_list.append({
                    'id':
                    row.id,
                    'header':
                    row.header,
                    'sub_header':
                    row.sub_header,
                    'sentiment_for':
                    "header",
                    'nltk_classify':
                    nltk_classify,
                    'nltk_confidence':
                    nltk_confidence,
                    'count_vectorizer_classify':
                    int(classify_cVector),
                    'count_vectorizer_confidence':
                    confidence_cVector,
                    'tfidf_vectorizer_classify':
                    int(classify_tfidf),
                    'tfidf_vectorizer_confidence':
                    confidence_tfidf
                })

            except Exception as error:
                print(error)
                database_log.error_log(
                    "run_news_sentiment_analyzer - predict_models_header_sentiment",
                    error)

    news_sentiment_data = pd.DataFrame(columns=[
        'id', 'header', 'sub_header', 'sentiment_for', 'nltk_classify',
        'nltk_confidence', 'count_vectorizer_classify',
        'count_vectorizer_confidence', 'tfidf_vectorizer_classify'
        'tfidf_vectorizer_confidence'
    ])

    if len(sentiment_data_list) > 0:
        news_sentiment_data = news_sentiment_data.append(sentiment_data_list)

    return news_sentiment_data