def NaiveBayesClassifierModel():

    #Get sample tweets file
    mdiProjectSampleTweetList = mdiProjectReadSampleFile(
        mdiProjectReadFiles("mdiProjectFiles", "SampleTrainingData.csv"))

    #Split into training and testing set
    mdiProjectTrainingSet, mdiProjectTestingSet = mdiProjectSplitTrainTest(
        mdiProjectSampleTweetList)

    #Get stop word list
    mdiProjectStopWordsList = mdiProjectStopWordList(
        mdiProjectReadFiles("mdiProjectFiles", "StopWords.txt"))

    #Get tweets and feature list
    mdiProjectTweetsList, mdiProjectFeatureList = mdiProjectGetFeatureListAndTweetListForTrainingSet(
        mdiProjectTrainingSet, mdiProjectStopWordsList)

    #Get training set which has feature list
    training_set = nltk.classify.util.apply_features(mdiProjectExtractFeatures,
                                                     mdiProjectTweetsList)

    NBClassifier = nltk.NaiveBayesClassifier.train(training_set)

    #Write naive bayes classifer as pickle file
    with open("NaiveBayesClassifierModel.pkl",
              'wb') as NaiveBayesClassifierModel_Pickle:
        pickle.dump(NBClassifier, NaiveBayesClassifierModel_Pickle, protocol=2)
예제 #2
0
def sentimentsAnalysisSingleTweetUsingNaiveBayes():

    #Get the tweet from UI
    mdiProjectTweet = request.form.get("tweet")

    #Process the tweet
    porcessedTweet = mdiProjectProcessTweet(mdiProjectTweet)

    #Get all the stop words
    stopWordList = mdiProjectStopWordList(
        mdiProjectReadFiles("mdiProjectFiles", "StopWords.txt"))

    #Get the features vector for a single tweet
    featureVector = mdiProjectGetFeatureVectorForSingleTweet(
        porcessedTweet, stopWordList)

    #Get the feacture words
    featureWords = mdiProjectExtractFeatures(featureVector)

    #Get sentiments based on the feature words
    tweetSentiment = NaiveBayesClassifierModel.classify(featureWords)

    return render_template(
        "naiveByesSingleTweet.html",
        prediction_text="Sentiment of the tweet is {}".format(tweetSentiment))
예제 #3
0
def mdiProjectProcessSampleFileToTrainModel():

    #Read sample tweets
    mdiProjectSampleTweetList = mdiProjectReadSampleFile(
        mdiProjectReadFiles("mdiProjectFiles", "SampleTrainingData.csv"))
    mdiProjectSampleTweetList = list(mdiProjectSampleTweetList)
    mdiProjectSampleTweetList = mdiProjectSampleTweetList[
        1:len(mdiProjectSampleTweetList)]

    #Create data frame out of the sample file tweets
    mdiProjectDataFrame = pd.DataFrame(mdiProjectSampleTweetList,
                                       columns=['sentiments', 'text'])

    mdiProjectTweetTextList = mdiProjectDataFrame['text']
    mdiProjectTweetSentimentsList = mdiProjectDataFrame['sentiments']

    #Clean tweet list
    mdiProjectTweetTextListClean = mdiProjectCleanTweet(
        mdiProjectTweetTextList)

    mdiProjectCleanedDataFrame = pd.DataFrame({
        'text':
        mdiProjectTweetTextListClean,
        'sentiments':
        mdiProjectTweetSentimentsList
    })

    return mdiProjectCleanedDataFrame
def naiveBayesClassifierTestModel():

    mdiProjectSampleTweetList = mdiProjectReadSampleFile(
        mdiProjectReadFiles("mdiProjectFiles", "SampleTrainingData.csv"))
    mdiProjectTrainingSet, mdiProjectTestingSet = mdiProjectSplitTrainTest(
        mdiProjectSampleTweetList)
    mdiProjectStopWordsList = mdiProjectStopWordList(
        mdiProjectReadFiles("mdiProjectFiles", "StopWords.txt"))

    #mdiProjectTestingSet = mdiProjectTestingSet[1:10]

    mdiProjectActualSentiments = []
    mdiProjectPredictedSentiments = []

    #Open NaiveBayesClassifier picke file
    with open("NaiveBayesClassifierModel.pkl", "rb") as NaiveBayesClassifier:
        NaiveBayesClassifierModel = pickle.load(NaiveBayesClassifier)

    for mdiProjectTestingTweetRow in mdiProjectTestingSet:

        mdiProjectSentiment = mdiProjectTestingTweetRow[0]
        mdiProjectTweet = mdiProjectTestingTweetRow[1]

        #Process the tweet
        porcessedTweet = mdiProjectProcessTweet(mdiProjectTweet)

        #Get the features vector for a single tweet
        featureVector = mdiProjectGetFeatureVectorForSingleTweet(
            porcessedTweet, mdiProjectStopWordsList)

        #Get the feacture words
        featureWords = mdiProjectExtractFeatures(featureVector)

        #Get sentiments based on the feature words
        tweetSentiment = NaiveBayesClassifierModel.classify(featureWords)

        mdiProjectActualSentiments.append(mdiProjectSentiment)
        mdiProjectPredictedSentiments.append(tweetSentiment)

    return mdiProjectActualSentiments, mdiProjectPredictedSentiments
예제 #5
0
def sentimentsAnalysisSingleTweetUsingNaiveBayes():
    """ Predict sentiments of a tweet
    ---
    produces:
        - "application/xml"
        - "application/json"
    parameters:
        - name: tweet
          in: query
          type: string
          required: true
    responses:
        content:
            application/json:
                schema:
                    type: object
    """

    #Open NaiveBayesClassifier picke file
    with open("NaiveBayesClassifierModel.pkl", "rb") as NaiveBayesClassifier:
        NaiveBayesClassifierModel = pickle.load(NaiveBayesClassifier)

    #Get the tweet from UI
    mdiProjectTweet = request.args.get("tweet")

    #Process the tweet
    porcessedTweet = mdiProjectProcessTweet(mdiProjectTweet)

    #Get all the stop words
    stopWordList = mdiProjectStopWordList(
        mdiProjectReadFiles("mdiProjectFiles", "StopWords.txt"))

    #Get the features vector for a single tweet
    featureVector = mdiProjectGetFeatureVectorForSingleTweet(
        porcessedTweet, stopWordList)

    #Get the feacture words
    featureWords = mdiProjectExtractFeatures(featureVector)

    #Get sentiments based on the feature words
    tweetSentiment = NaiveBayesClassifierModel.classify(featureWords)

    return tweetSentiment
예제 #6
0
def sentimentsAnalysisBasedOnTweetTopicUsingNaiveBayes():
    """ Predict sentiments of a tweet
    ---
    parameters:
        - name: tweetTopic
          in: query
          type: string
          required: true
    responses:
        content:
            application/json:
                schema:
                    type: object
    """
    #Open NaiveBayesClassifier picke file
    with open("NaiveBayesClassifierModel.pkl", "rb") as NaiveBayesClassifier:
        NaiveBayesClassifierModel = pickle.load(NaiveBayesClassifier)

    #Get the tweet topic
    mdiProjectTweetTopic = request.args.get("tweetTopic")

    #Get the tweets based on the tweet topic and save in file
    mdiProjectSearchTweetBasedOnSearchTerm(
        mdiProjectTweeterAuthHandler(
            mdiProjectGetNormalizedTweeterConfig(
                "mdiProjectFiles/tweeterConfig.json")), mdiProjectTweetTopic)

    #Process saves tweets json file
    mdiProjectTweetTextList, mdiProjectTweetText = mdiProjectProcessTweetJsonFile(
        "mdiProjectTweets.json")

    #Get all the stop words
    stopWordList = mdiProjectStopWordList(
        mdiProjectReadFiles("mdiProjectFiles", "StopWords.txt"))

    sentiments = []

    for mdiProjectTweet in mdiProjectTweetTextList:

        #Process the tweet
        porcessedTweet = mdiProjectProcessTweet(mdiProjectTweet)

        #Get the features vector for a single tweet
        featureVector = mdiProjectGetFeatureVectorForSingleTweet(
            porcessedTweet, stopWordList)

        #Get the feacture words
        featureWords = mdiProjectExtractFeatures(featureVector)

        #Get sentiments based on the feature words
        tweetSentiment = NaiveBayesClassifierModel.classify(featureWords)
        tweetSentiment = tweetSentiment.replace("\"", "")

        sentiments.append(tweetSentiment)

    mdiProjectTweetText["sentiments"] = sentiments

    #Makes an excel file with sentimets and make it downloadable
    mdiProjectOutput = BytesIO()
    mdiProjectOutputWriter = pd.ExcelWriter(mdiProjectOutput,
                                            engine="xlsxwriter")
    mdiProjectTweetText.to_excel(mdiProjectOutputWriter,
                                 sheet_name="twitter_sentiments",
                                 encoding="utf-8",
                                 index=False)
    mdiProjectOutputWriter.save()

    memory_file = BytesIO()
    with zipfile.ZipFile(memory_file, 'w') as zf:
        names = ['twitter_sentiments.xlsx']
        files = [mdiProjectOutput]
        for i in range(len(files)):
            data = zipfile.ZipInfo(names[i])
            data.date_time = time.localtime(time.time())[:6]
            data.compress_type = zipfile.ZIP_DEFLATED
            zf.writestr(data, files[i].getvalue())
    memory_file.seek(0)
    response = make_response(
        send_file(memory_file,
                  attachment_filename='twitter_sentiments.zip',
                  as_attachment=True))
    response.headers['Access-Control-Allow-Origin'] = '*'

    return response
예제 #7
0
"""
Created on Mon Oct 14 08:47:51 2019

@author: Santosh Sah
"""

from nltk.stem.wordnet import WordNetLemmatizer
import string
import re
import pandas as pd
from MDIProjectSentimentAnalysis import (mdiProjectReadFiles,
                                         mdiProjectReadSampleFile,
                                         mdiProjectStopWordList)

mdiProjectStopWordsList = mdiProjectStopWordList(
    mdiProjectReadFiles("mdiProjectFiles", "StopWords.txt"))
mdiProjectPunctuationList = set(string.punctuation)
mdiProjectLemmitization = WordNetLemmatizer()
mdiProjectCleanedTweetList = []


#Clean tweet list
def mdiProjectCleanTweet(mdiProjectTweetList):

    #Looping over list of tweets
    for i in range(len(mdiProjectTweetList)):

        mdiProjectTweet = re.sub('[^a-zA-Z]', ' ', mdiProjectTweetList[i])

        #Lowering the case of tweet
        mdiProjectTweet = mdiProjectTweet.lower()