def create_edges(conversations, characters):
    edges = {}
    sid = SentimentIntensityAnalyzer()
    for character in characters:
        char_conversations = SentimentAnalysis.get_all_character_conversations(
            character=character, conversations=conversations)
        for name, conversation in char_conversations.iteritems():
            splitted_conversation = conversation.split('@')
            avg_score, label = SentimentAnalysis.classify_conversation(
                splitted_conversation, analyzer=sid)
            if '-' in name:
                first_character, second_character = name.split('-')
                if first_character in characters and second_character in characters:
                    edges[name] = {}
                    edges[name]['from'] = first_character
                    edges[name]['to'] = second_character
                    edges[name]['weight'] = format(avg_score, '.2f')
                    edges[name]['conv_length'] = len(splitted_conversation)
                    if label == 'Positive':
                        edges[name]['color'] = 'g'
                    elif label == 'Negative':
                        edges[name]['color'] = 'r'
                    else:
                        edges[name]['color'] = 'b'
    return edges
Exemple #2
0
def printQualifyPolticians(results, sentilex):
    print "========== PRINTING ALL PERSONS ========"
    for r in results:
        print "================ News =============="
        print ">>TITLE"
        print r['title']
        text = r['content']
        print ">>QUALIFY"
        SentimentAnalysis.personSentimenti(SentimentAnalysis.createPOSdict(text), sentilex)
Exemple #3
0
def upload_route_summary():
    if request.method == 'POST':
        f = request.files['fileupload']
        print(f)
        THIS_FOLDER = os.path.dirname(os.path.abspath(__file__))
        filename_path = os.path.join(THIS_FOLDER, f.filename)
        SentimentAnalysis.uploaded_file(filename_path)
        data = {'username': "******"}
        return render_template('excel.html', data=data)
Exemple #4
0
def printQualify(results, extractsentilex):
    """ dump. print all items in a result Object """
    print "========== PRINTING ALL PERSONS ========"
    for r in results:
        print "================ News =============="
        print ">>TITLE"
        print r['title']
        text = r['content']
        print ">>QUALIFY"
        SentimentAnalysis.qualifyNew(token.filterStopsSet(text), extractsentilex)
Exemple #5
0
def keyword():
    data = {'username': "******"}
    if request.method == "POST":
        global SIZE
        print(SIZE)
        if len(request.form['fetch_tweet']) == 0:
            return render_template('keyword.html', data=data)
        SentimentAnalysis.fetch_tweets(request.form['fetch_tweet'], SIZE)
        return render_template('keyword.html', data=data)
    else:
        return render_template("keyword.html")
def changeUser():
    global currentUser
    currentUser = Recommendation.getRandomUserID()
    #rest the sentiment score
    SentimentAnalysis.reset()
    currentSentimentScore = SentimentAnalysis.initSentimentScore()

    trackObjs = getUserRecommendation()
    htmlSnippet = render_template('recommendation.html', trackObjs=trackObjs)

    data = jsonify({'datax': htmlSnippet, 'currentUser': str(currentUser)})
    return data
Exemple #7
0
    def on_data(self, data):
        try:
            x = json.loads(data)

            # Setting up DB Connection
            if dbname in couchserver:
                db = couchserver[dbname]
            else:
                db = couchserver.create(dbname)

            # Setting Unique key
            x['_id'] = str(x['id_str'])

            # Sentiment Analysis Module
            sentiment = SentimentAnalysis.sentiment_score(str(x['text']))
            # print(x['text'])
            if str(data).__contains__('extended_tweet": {'):
                sentiment = (SentimentAnalysis.sentiment_score(
                    str(x['extended_tweet']['full_text'])))
            # print(sentiment)
            x['sentiment'] = sentiment

            # Geo-analysis module
            area_details = [None, None]

            if x['coordinates'] is None and x['place'][
                    'bounding_box'] is not None:
                box_coordinate_list = x['place']['bounding_box']['coordinates']
                area_details = GeoProcessor.find_bounding_box_area(
                    dictionaries[0], dictionaries[1], box_coordinate_list)

            if x['coordinates'] is not None:
                coordinates = Point(x['coordinates']['coordinates'])
                area_details = GeoProcessor.find_point_area(
                    dictionaries[0], dictionaries[1], coordinates)

            if area_details[0] is not None and area_details[1] is not None:
                x['SA3_Code'] = area_details[0]
                x['SA3_Name'] = area_details[1]

            # Saving changes to couchdb
            db.save(x)
            return True
        except tweepy.RateLimitError:
            time.sleep(15 * 60)
            logging.debug('Rate Limit Exceeded at ', time.time())
        except BaseException as e:
            logging.debug('Error on_data', e, 'at', time.time())
        return True
def emotionDetection():
    global vs, outputFrame, lock
    while True:
        frame = vs.read()

        #frame , objectSet = ObjectDetection.objectDetection(frame)
        frame, emotionList = FaceDetection.getFaceROI(frame)
        SentimentAnalysis.storeSentimentScore(emotionList)

        frame = imutils.resize(frame, width=700)

        # acquire the lock, set the output frame, and release the
        # lock
        with lock:
            outputFrame = frame.copy()
Exemple #9
0
def displayAssignmentData(twitter_data_set, row_amount):
    """
    displayAssignmentData() uses the data_set from generateDataSet() to create a data frame which can then be
    formatted and sorted. The data frame is then analysed for sentiment and the results are displayed. Finally, the data
    frame is used with tabulate to format and display an orderly table.
    """
    data_display_format = DataFrameDisplayFormat.DataFrameDisplayFormat(
    )  # Instantiate FileGenerator object.

    # Convert the data set to a data frame.
    twitter_data_frame = data_display_format.convertDataSetToDataFrame(
        twitter_data_set)

    # Format the frame, sorting columns and rows.
    twitter_data_frame = data_display_format.formatDataFrame(
        twitter_data_frame)

    sentiment_analysis = SentimentAnalysis.SentimentAnalysis(
    )  # Instantiate SentimentAnalysis object.

    sentiment_analysis.displaySentimentPercentages(
        twitter_data_frame)  # Display sentiment analysis results.

    show_index = True  # Set to show index numbers.
    # Display formatted data frame using an amount of rows.
    data_display_format.displayDataFrame(twitter_data_frame, row_amount,
                                         show_index)
Exemple #10
0
def emotionDetect(filePath, outPath):
    sa = localsa.SentimentAnalysis(DICT_PATH)
    lineNum = 0
    with open(filePath, 'rb') as f:
        for line in f.readlines():
            lineNum += 1
            if lineNum % 10000 == 0:
                print("LINE=" + str(lineNum), file=sys.stderr)
            try:
                sentence = line.decode('utf-8').strip()
            except:
                print('[ERROR] line:' + str(lineNum), file=sys.stderr)

            sentenceTmp = sentence
            sentence = re.sub('\W', ' ', sentence).strip()  # 替换掉非文字
            if not localbt.isValidSent(sentence):
                print('[Invalid]' + sentenceTmp)
                continue

            sentenceTmp = re.sub('[\[\]]', '"', sentenceTmp)
            with open(outPath, 'a') as f:
                f.write("[" + sentenceTmp + "]" + ",")

            segResult = list(jieba.cut(sentence))
            score = sa.sentimentScore(segResult)
            flag = "__POS__" if score[0] > score[1] else \
                   ("__NEG__" if score[0] < score[1] else "__EQU__")
            with open(outPath, 'a') as f:
                f.write('[' + str(score[0]) + ', ' + str(score[1]) + ", " +
                        flag + "]" + ",")
                result = ', '.join(getRidInSet(segResult, meaninglessSet))
                f.write(result + '\n')
Exemple #11
0
def analyse(foo, text):
    print("RUNNING ANALYSIS")
    tweets = text.split("TEXT: ")
    #print(tweets)
    Results = SentimentAnalysis.AnalyseTweets(tweets)
    #print(Results)
    print("Analysis complete")
    retString = json.dumps(Results)
    return retString
Exemple #12
0
        def getKeyWordAndTrackInfo():
            for i in self.topHeadlines['articles']:
                keyWords.append(sa.key_phrases(i['title']))
            time.sleep(2)
            for j in keyWords:
                self.trackInfo.append(ss.getSong(j))

            self.displayTrackBtn.setEnabled(True)
            print(keyWords)
            print(self.trackInfo)
Exemple #13
0
def main():
    #charDict = ParsePlay.getAllTopChars(1) # Char Dict contains a bunch of chars w/ names as keys
    charDict = ParsePlay.parsePlay('./shaks200/dream.xml') # Char Dict contains a bunch of chars w/ names as keys

    charDictScaled = ScaleTime.rescaleTime(charDict)
    charScores = SentimentAnalysis.turn_lines_to_score(charDictScaled)
    charScoresInterpolated = Interpolate.interpolate_chars_uniformly(charScores, 100)
    charScoresFiltered = LowPassFilter.lowPassAllChars(charScoresInterpolated)
    pp = pprint.PrettyPrinter()
    pp.pprint(charScoresFiltered)
Exemple #14
0
def argue():
    query = [x for x in request.form.values()][0]
    print(query)
    global start, topic, links, articles, corpus, sent_tokens, word_tokens, models, tagged_sentences
    in_corpus, in_sent_tokens, in_word_tokens = Preprocessing.data_preprocessing(
        query)

    #Get sentences of same context as the input query.
    same_context = []
    for i in in_sent_tokens:
        for model in models:
            same_context += ContextAnalysis.get_similar_sentences(
                i, model, tagged_sentences, top_n=5)

    #Get the sentences of the same and reasonable polarity.
    sent_score = SentimentAnalysis.get_sentence_polarity(same_context)

    out = []
    for k in range(len(in_sent_tokens)):
        out.append(
            SentimentAnalysis.find_sentences(in_sent_tokens[k],
                                             same_context,
                                             sent_score,
                                             similar=True,
                                             top_n=10)[0])
        out.append(
            SentimentAnalysis.find_sentences(in_sent_tokens[k],
                                             same_context,
                                             sent_score,
                                             similar=True,
                                             top_n=10)[1])
    #output processing
    out = list(set(out))

    print('Input:\n', query)
    print('Output:\n', '.'.join(out))

    print("Time taken Checkpoint-3:{} mins.".format(
        (time.time() - start) / 60))
    return render_template('index.html',
                           argument='What I have to say is....\n {}'.format(
                               '.'.join(out)))
Exemple #15
0
    def prediction(self):
        text = self.textEdit.toPlainText()
        pred = SentimentAnalysis.test(text)
        # print(pred)
        if pred == 'Negative':
            self.negCount += 1
        else:
            self.posCount += 1

        self.readWrite(self.negCount, self.posCount)
        self.saveReview(text,pred)
def getRecommendation():
    '''
    This function updates the currentSentimentScore to  estimated sentimentScore.
    Restart tracking sentimentScore
    
    
    '''
    global currentSentimentScore

    #Getting the current score and tracks for the score
    currentSentimentScore = SentimentAnalysis.getSentimentScore()
    trackObjs = getUserRecommendation()

    #Reseting the sentimentScores
    SentimentAnalysis.reset()
    currentSentimentScore = SentimentAnalysis.initSentimentScore()

    htmlSnippet = render_template('recommendation.html', trackObjs=trackObjs)
    data = jsonify({'datax': htmlSnippet})
    return data
Exemple #17
0
def analyze(sentence=None):
    retval = None
    if request.method == 'POST':
        sentence = request.form['sentence']
        if sentence is not None:
            retval = SentimentAnalysis.Analyse_Raw(sentence)
            if retval['compound'] >= 0:
                retval['result'] = 'Pos'
            else:
                retval['result'] = 'Neg'

    return jsonify(retval)
Exemple #18
0
 def __init__(self, configFilepath=None):
     #configFilepath = r"C:\Users\ashvin\Desktop\UnderDevelopment\sentimentConfig.csv"
     df = pd.read_csv(configFilepath)
     df.set_index('Parameters', inplace=True)
     reviews = df.loc['paths', 'Value'].split(',')
     catNum = df.loc['numOfCat', 'Value']
     catTypes = df.loc['catType', 'Value'].split(',')
     sentimentLabel = df.loc['train', 'Value']
     sentiment = sa.SentimentTrain()
     acc, allReviews = sentiment.extract(reviews, catNum, catTypes,
                                         sentimentLabel)
     visuals = Visualisation(df=None, target=None)
     visuals.wordCloud(allReviews)
def updateSystemDataset(): 
    mycompanies = dbmanager.getCompanies()
    for company in mycompanies:
        ticker = company[1]
        name = company[0]
        sentiment = analysis.pullTweets(name)
        dbmanager.insertSentimentResults(name,sentiment[0],sentiment[1],
                sentiment[2], sentiment[5], sentiment[4], sentiment[3])

        healthdata = health.extract_healthdata(ticker,'1')

        dbmanager.insertHealtResults(name,healthdata[0],healthdata[1],
        healthdata[2],healthdata[3],healthdata[4],healthdata[5],healthdata[6],
                healthdata[7],healthdata[8],healthdata[9],healthdata[10],
                                     healthdata[11],healthdata[12])
Exemple #20
0
def main():
    charDict = ParsePlay.getAllTopChars(5) # Char Dict contains a bunch of chars w/ names as keys
    charDictScaled = ScaleTime.rescaleTime(charDict)
    charScores = SentimentAnalysis.turn_lines_to_score(charDictScaled)
    charScoresInterpolated = Interpolate.interpolate_chars_uniformly(charScores, 100)
    charScoresFiltered = LowPassFilter.lowPassAllChars(charScoresInterpolated, window_ratio=.2)
    charScoresScaled = ScaleScores.scale_all_scores(charScoresFiltered)
    train, test = getTrainTestSplit(charScoresScaled, numTest=1)
    clusters = CharacterKMeans.characterKMeans(train, 5)
    pp = pprint.PrettyPrinter()
    chars = zip(*clusters)[1]
    pp.pprint(chars)

    newWithPredicted = CharacterKMeans.predictCluster(test, zip(*clusters)[0], charScoresScaled)
    pp.pprint(newWithPredicted)
Exemple #21
0
 def __init__(self, df=None):
     #configFilepath = r"C:\Users\ashvin\Desktop\UnderDevelopment\sentimentConfig.csv"
     #df = pd.read_csv(configFilepath)
     #df.set_index('Parameters', inplace = True)
     print('creating Results directory--------> at {}'.format(os.getcwd()))
     try:
         os.makedirs('results')
     except:
         pass
     reviews = df.loc['paths', 'Value'].split(',')
     catNum = df.loc['numOfCat', 'Value']
     catTypes = df.loc['catType', 'Value'].split(',')
     sentimentLabel = df.loc['train', 'Value']
     sentiment = sa.SentimentTrain()
     acc, allReviews = sentiment.extract(reviews, catNum, catTypes,
                                         sentimentLabel)
     visuals = Visualisation(df=None, target=None)
     visuals.wordCloud(allReviews)
Exemple #22
0
def main():
    # Removing results file from last run
    del_last_results_file()
    # creating object of TwitterClient Class
    api = sa.TwitterClient()
    # calling function to get tweets
    print("Enter search term")
    tweets = api.get_tweets(query=sys.stdin.read(), count=1000)
    # saving data to excel file
    save_data("Training Set", tweets)
    # Preparing input list for classifier
    classifier_input_list = prepare_classifier_input(tweets)
    # Training classifier with already fetched tweets
    # train_classifier(classifier_input_list)
    # Fetch classifier test data from twitter
    print("Enter search term for feeding tweets to test classifier: ")
    test_tweets = api.get_raw_tweets(query=sys.stdin.read(), count=1000)
    # Testing classifier
    classifier_output_list = classify_data(test_tweets, classifier_input_list)
    save_data("Result Set", classifier_output_list)

    # picking positive tweets from tweets
    ptweets = [tweet for tweet in tweets if tweet['sentiment'] == 'positive']
    # percentage of positive tweets
    pos_tweet_percentage = 100 * len(ptweets) / len(tweets)
    print("Positive tweets percentage: {} %".format(pos_tweet_percentage))
    # picking negative tweets from tweets
    ntweets = [tweet for tweet in tweets if tweet['sentiment'] == 'negative']
    # percentage of negative tweets
    neg_tweet_percentage = 100 * len(ntweets) / len(tweets)
    print("Negative tweets percentage: {} %".format(neg_tweet_percentage))
    # percentage of neutral tweets
    print("Neutral tweets percentage: {} %".format(100 - pos_tweet_percentage -
                                                   neg_tweet_percentage))

    # printing first 10 positive tweets
    print("\nPositive tweets: ")
    for tweet in ptweets[:10]:
        print(tweet['text'])

    # printing first 10 negative tweets
    print("\n\nNegative tweets:")
    for tweet in ntweets[:10]:
        print(tweet['text'])
def getCorrectSpelling():
    if request.method != 'POST':
        return json.dumps({
            "Status": "ERROR",
            "DATA": None,
            "Reason": "Only accept POST request"
        })
    if not request.headers['Content-Type'] == 'application/json':
        return json.dumps({
            "Status":
            "ERROR",
            "DATA":
            None,
            "Reason":
            "Only  accept Content-Type:application/json"
        })
    if not request.is_json:
        return json.dumps({
            "Status":
            "ERROR",
            "DATA":
            None,
            "Reason":
            'Expecting json data in the form {"data":"VALUE"}'
        })

    try:
        data = dict(request.json)
        sentence = data["sentence"]

        result = SentimentAnalysis.getResult(sentence)

        return json.dumps({
            "Status": "SUCCESS",
            "DATA": str(result),
            "Reason": ""
        })
    except Exception as e:
        return json.dumps({"Status": "ERROR", "DATA": None, "Reason": str(e)})
class AnalysisOfSentiment:
    def __init__(self):
        self.obj = SentimentAnalysis()

    # def GetSentiment(self, text):
    #     train = self.obj.GetTrainDataSet()
    #     dict = self.obj.GetDictionaryOfTrainData(train)
    #     t = self.obj.GetSampleDataForTraining(train, dict)
    #     classifier = self.obj.TrainNaiveBayesClassifier(t)
    #     features = self.obj.GetDataFeatures(text, dict)
    #     result = self.obj.GetClassifiedResult(classifier, features)
    #     return result

    def GetVaderSentimentIntensity(self, text):
        result = self.obj.VaderSentimentIntensityAnalyzer(text)
        return result

    def GetTextBlobSentimentAnalyzer(self, text):
        result = self.obj.TextBlobSentimentAnalyzer(text)
        return result

    def GetAzureSentimentAnalyzer(self, text):
        result = self.obj.AzureSentimentAnalyzer(text)
        return result

    def GetStanfordCoreNLPSentimentAnalyzer(self, text):
        result = self.obj.StanfordCoreNLPSentimentAnalyzer(text)
        return result

    def GetGoogleSentimentAnalyzer(self, text):
        result_score, result_magnitude = self.obj.GoogleSentimentAnalyzer(text)
        return result_score, result_magnitude

    def GetIBMWatsonSentimentAnalyzer(self, text):
        result_score, result_label = self.obj.IBMWatsonSentimentAnalyzer(text)
        return result_score, result_label
import os
import sys
import re

from BarrageTool import BarrageTool
from JiebaSegment import jiegSeg
import SentimentAnalysis as localsa

# filePath = 'temp'
# filePath = './highlightClips.log'
DICT_PATH = './dict/'
sa = localsa.SentimentAnalysis(DICT_PATH)


def sentEmotDetect(sentence):
    segResult = jiegSeg(sentence)
    score = sa.sentimentScore(segResult)
    return score


def processFile(localbt, filePath, logPath, subPath, inlPath):
    lineNum = 0
    maxLine = False
    # barrageReStr = r'[0-9\]: \/]+room\[[0-9]+\] uid\([0-9]+\).*\]: (.*)$'
    barrageReStr = r'[0-9\]: \/]+room\[[0-9]+\] uid\([0-9]+\) +event\[[\w ]*\].*\]: (.*)$'
    barrageRe = re.compile(barrageReStr)
    with open(filePath, 'rb') as f:
        for line in f.readlines():
            if maxLine and lineNum > maxLine:
                break
            lineNum += 1
Exemple #26
0
def getnews(symbol): 
	
	return SentimentAnalysis.get_google_news(symbol)
Exemple #27
0
cur = begin
all_topics = {}
cumulativeTopics = []
all_sentiment = {}
cumulativeSentiment = []

if stride < 1:
    stride = 1

if stride is None:
    stride = len(input)

# print("STRIDE:",stride)

#trains corpus
classifier, tagList = SentimentAnalysis.trainCorpus(corpus)

#parses input
for i in range((end - begin) // stride):
    for sentence in input[cur:cur + stride]:
        if "utterance" not in sentence:
            continue
        taggedSentences = contextsummary.posTag(sentence["utterance"])
        speaker = sentence["speaker"]

        #context
        topic = contextsummary.sentenctExtract(taggedSentences)
        for top in topic:
            stride_topics[top] = stride_topics.get(top, 0) + 1
            all_topics[top] = all_topics.get(top, 0) + 1
Exemple #28
0
def index():
    return "<p>Hello World!</p>" + "<p>" + sa.sentiment(
        movie_reviews.raw('pos/cv008_29435.txt')) + "</p>"
Exemple #29
0
#Call function to obtain recent TweetIds from file
USER_LIST = get_recent_tweet_ids_from_file()

# Print each tweet in the stream to the screen
for user in USER_LIST:
    print(user.tsn + ", " + user.tid)
    #Fetch twitter data
    ITERATOR = TWITTER.statuses.user_timeline(screen_name=user.tsn,
                                              since_id=user.tid,
                                              trim_user="******",
                                              exclude_replies="true",
                                              tweet_mode="extended")
    #ITERATOR = TWITTER.statuses.user_timeline(screen_name=user.tsn,since_id = user.tid,exclude_replies="true")
    # Print list of tweets
    for tweet in ITERATOR:
        sentiment_analysis = SentimentAnalysis.get_sentiment_analysis(
            tweet['full_text'])
        # Twitter Python Tool wraps the data returned by Twitter
        # as a TwitterDictResponse object.
        # We convert it back to the JSON format to print/score
        print(json.dumps(tweet['id']))
        # print (json.dumps(tweet['user']['name']))
        # print (json.dumps(tweet['user']['screen_name']))
        print(json.dumps(tweet['created_at']))
        print(json.dumps(tweet['full_text']))
        print(sentiment_analysis)
        print('')

print('')
print("Tweet fetch complete.")
print('')
def main():
    """
    This method runs topic modelling through LDA.
    Note: code adapted from Lecturer Jeffrey Chan

    :return:
    """
    # tweets json filename
    jsonFilename = "uberTweetsUS.json"

    # returns tweetTokens and tweetDates
    tweets = tp.getTweetDf(jsonFilename, type="topic", removeFreqWords=True)

    featureNum = 250  # this is the number of features/words to used to describe our documents
    wordNumToDisplay = 20  # number of words to display for each topic
    topicNum = 3  # number of topics to be created

    # Count Vectorizer
    tfVectorizer = CountVectorizer(max_df=0.95,
                                   min_df=10,
                                   max_features=featureNum,
                                   lowercase=False,
                                   stop_words=None)
    # Create a term document matrix
    tf = tfVectorizer.fit_transform(tweets["tweetTokens"])

    # Extract the names of the features - words
    tfFeatureNames = tfVectorizer.get_feature_names()

    # Set seed to allow reproducibility of results
    seed(7777)
    # Implement topic modeling using LDA
    ldaModel = LatentDirichletAllocation(n_components=topicNum,
                                         max_iter=10,
                                         learning_method='online').fit(tf)

    # Print out topics
    display_topics(ldaModel, tfFeatureNames, wordNumToDisplay)

    ### The following set of codes were adapted from https://www.machinelearningplus.com/nlp/topic-modeling-python-sklearn-examples/
    # The codes below aims to assign a topic to each tweet based on the constructed topic model
    # also the overall topic distribution is also obtained

    # Obtain ldaModel output
    lda_output = ldaModel.transform(tf)

    topicNames = ["Topic" + str(i)
                  for i in range(topicNum)]  # topic names e.g., Topic 0, 1, ..
    tweetNames = ["Tweet" + str(i) for i in range(len(tweets["tweetTokens"]))
                  ]  # tweet names e.g., Tweet 0, 1, ..

    # Make a pandas dataframe
    # this dataframe has assigned probabilities that a certain tweet is topic, 0, 1, or 2
    tweets_and_topics = pd.DataFrame(np.round(lda_output, 2),
                                     columns=topicNames,
                                     index=tweetNames)

    # Get dominant topic for each tweet
    # Return topic for a certain tweet if probability to a certain topic is the highest
    tweet_dominant_topic = np.argmax(tweets_and_topics.values, axis=1)
    tweets_and_topics["dominant_topic"] = tweet_dominant_topic

    # Print Overall Topic Distribution
    print("Topic Distribution")
    df_topic_distribution = tweets_and_topics["dominant_topic"].value_counts(
    ).reset_index(name="Num Documents")
    df_topic_distribution.columns = ["Topic Number", "Number of Tweets"]
    print(df_topic_distribution)

    # Display word cloud
    displayWordcloud(ldaModel, tfFeatureNames)

    # Apply sentiment analysis to each constructed topics
    # returns tweetTokens and tweetDates
    # do this again to obtain tweet tokens in a format ready for sentiment analysis
    tweets = tp.getTweetDf(jsonFilename, removeFreqWords=True)

    # get sentiments for each tweet
    vaderSentiments = sa.vaderSentimentAnalysis(tweets["tweetTokens"],
                                                printSentiment=False)

    # preprate data for plotting
    tweet_df = pd.DataFrame({
        "Sentiments": vaderSentiments,
        "Date": tweets["tweetDates"],
        "DominantTopic": tweet_dominant_topic
    })
    tweet_df["Sentiments"] = tweet_df["Sentiments"].apply(pd.to_numeric)

    # distribution of sentiments across Topics
    g = ggplot(aes(x='Sentiments'), data=tweet_df) + \
        geom_histogram() + \
        facet_wrap('DominantTopic', nrow=3) + \
        labs(x="Sentiment Score", y="Frequency")
    print(g)
Exemple #31
0
def getSentimentPositivityOf(entry):
    return SentimentAnalysis.sentiment_analysis(entry)
Exemple #32
0
def model():

    df = SA.run()
    sns.countplot(x='Decisions', data=df)

    X = []
    sentences = list(df['Carddata'])
    for sen in sentences:
        X.append(preprocess_text(sen))

    y = df['Decisions']
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.20,
                                                        random_state=42)
    tokenizer = Tokenizer(num_words=10000)
    tokenizer.fit_on_texts(X_train)

    X_train = tokenizer.texts_to_sequences(X_train)
    X_test = tokenizer.texts_to_sequences(X_test)
    vocab_size = len(tokenizer.word_index) + 1
    maxlen = 300

    X_train = pad_sequences(X_train, padding='post', maxlen=maxlen)
    X_test = pad_sequences(X_test, padding='post', maxlen=maxlen)

    embeddings_dictionary = dict()
    glove_file = open('glove.42B.300d.txt', encoding="utf8")

    for line in glove_file:
        records = line.split()
        word = records[0]
        vector_dimensions = np.asarray(records[1:], dtype='float32')
        embeddings_dictionary[word] = vector_dimensions
    glove_file.close()

    embedding_matrix = np.zeros((vocab_size, 300))
    for word, index in tokenizer.word_index.items():
        embedding_vector = embeddings_dictionary.get(word)
        if embedding_vector is not None:
            embedding_matrix[index] = embedding_vector

    model = Sequential()
    embedding_layer = Embedding(vocab_size,
                                300,
                                weights=[embedding_matrix],
                                input_length=maxlen,
                                trainable=False)
    model.add(embedding_layer)
    model.add(Flatten())
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['acc'])

    print(model.summary())
    history = model.fit(X_train,
                        y_train,
                        batch_size=128,
                        epochs=6,
                        verbose=1,
                        validation_split=0.2)
    score = model.evaluate(X_test, y_test, verbose=1)
    print("Score:", score[0])
    print("Test Accuracy:", score[1])
Exemple #33
0
def drawSentimentImage(pageSentimentDict,pagesDict,maxfreq):

    pageSentimentDict = SentimentAnalysis.scaling(pageSentimentDict)