def create_edges(conversations, characters): edges = {} sid = SentimentIntensityAnalyzer() for character in characters: char_conversations = SentimentAnalysis.get_all_character_conversations( character=character, conversations=conversations) for name, conversation in char_conversations.iteritems(): splitted_conversation = conversation.split('@') avg_score, label = SentimentAnalysis.classify_conversation( splitted_conversation, analyzer=sid) if '-' in name: first_character, second_character = name.split('-') if first_character in characters and second_character in characters: edges[name] = {} edges[name]['from'] = first_character edges[name]['to'] = second_character edges[name]['weight'] = format(avg_score, '.2f') edges[name]['conv_length'] = len(splitted_conversation) if label == 'Positive': edges[name]['color'] = 'g' elif label == 'Negative': edges[name]['color'] = 'r' else: edges[name]['color'] = 'b' return edges
def printQualifyPolticians(results, sentilex): print "========== PRINTING ALL PERSONS ========" for r in results: print "================ News ==============" print ">>TITLE" print r['title'] text = r['content'] print ">>QUALIFY" SentimentAnalysis.personSentimenti(SentimentAnalysis.createPOSdict(text), sentilex)
def upload_route_summary(): if request.method == 'POST': f = request.files['fileupload'] print(f) THIS_FOLDER = os.path.dirname(os.path.abspath(__file__)) filename_path = os.path.join(THIS_FOLDER, f.filename) SentimentAnalysis.uploaded_file(filename_path) data = {'username': "******"} return render_template('excel.html', data=data)
def printQualify(results, extractsentilex): """ dump. print all items in a result Object """ print "========== PRINTING ALL PERSONS ========" for r in results: print "================ News ==============" print ">>TITLE" print r['title'] text = r['content'] print ">>QUALIFY" SentimentAnalysis.qualifyNew(token.filterStopsSet(text), extractsentilex)
def keyword(): data = {'username': "******"} if request.method == "POST": global SIZE print(SIZE) if len(request.form['fetch_tweet']) == 0: return render_template('keyword.html', data=data) SentimentAnalysis.fetch_tweets(request.form['fetch_tweet'], SIZE) return render_template('keyword.html', data=data) else: return render_template("keyword.html")
def changeUser(): global currentUser currentUser = Recommendation.getRandomUserID() #rest the sentiment score SentimentAnalysis.reset() currentSentimentScore = SentimentAnalysis.initSentimentScore() trackObjs = getUserRecommendation() htmlSnippet = render_template('recommendation.html', trackObjs=trackObjs) data = jsonify({'datax': htmlSnippet, 'currentUser': str(currentUser)}) return data
def on_data(self, data): try: x = json.loads(data) # Setting up DB Connection if dbname in couchserver: db = couchserver[dbname] else: db = couchserver.create(dbname) # Setting Unique key x['_id'] = str(x['id_str']) # Sentiment Analysis Module sentiment = SentimentAnalysis.sentiment_score(str(x['text'])) # print(x['text']) if str(data).__contains__('extended_tweet": {'): sentiment = (SentimentAnalysis.sentiment_score( str(x['extended_tweet']['full_text']))) # print(sentiment) x['sentiment'] = sentiment # Geo-analysis module area_details = [None, None] if x['coordinates'] is None and x['place'][ 'bounding_box'] is not None: box_coordinate_list = x['place']['bounding_box']['coordinates'] area_details = GeoProcessor.find_bounding_box_area( dictionaries[0], dictionaries[1], box_coordinate_list) if x['coordinates'] is not None: coordinates = Point(x['coordinates']['coordinates']) area_details = GeoProcessor.find_point_area( dictionaries[0], dictionaries[1], coordinates) if area_details[0] is not None and area_details[1] is not None: x['SA3_Code'] = area_details[0] x['SA3_Name'] = area_details[1] # Saving changes to couchdb db.save(x) return True except tweepy.RateLimitError: time.sleep(15 * 60) logging.debug('Rate Limit Exceeded at ', time.time()) except BaseException as e: logging.debug('Error on_data', e, 'at', time.time()) return True
def emotionDetection(): global vs, outputFrame, lock while True: frame = vs.read() #frame , objectSet = ObjectDetection.objectDetection(frame) frame, emotionList = FaceDetection.getFaceROI(frame) SentimentAnalysis.storeSentimentScore(emotionList) frame = imutils.resize(frame, width=700) # acquire the lock, set the output frame, and release the # lock with lock: outputFrame = frame.copy()
def displayAssignmentData(twitter_data_set, row_amount): """ displayAssignmentData() uses the data_set from generateDataSet() to create a data frame which can then be formatted and sorted. The data frame is then analysed for sentiment and the results are displayed. Finally, the data frame is used with tabulate to format and display an orderly table. """ data_display_format = DataFrameDisplayFormat.DataFrameDisplayFormat( ) # Instantiate FileGenerator object. # Convert the data set to a data frame. twitter_data_frame = data_display_format.convertDataSetToDataFrame( twitter_data_set) # Format the frame, sorting columns and rows. twitter_data_frame = data_display_format.formatDataFrame( twitter_data_frame) sentiment_analysis = SentimentAnalysis.SentimentAnalysis( ) # Instantiate SentimentAnalysis object. sentiment_analysis.displaySentimentPercentages( twitter_data_frame) # Display sentiment analysis results. show_index = True # Set to show index numbers. # Display formatted data frame using an amount of rows. data_display_format.displayDataFrame(twitter_data_frame, row_amount, show_index)
def emotionDetect(filePath, outPath): sa = localsa.SentimentAnalysis(DICT_PATH) lineNum = 0 with open(filePath, 'rb') as f: for line in f.readlines(): lineNum += 1 if lineNum % 10000 == 0: print("LINE=" + str(lineNum), file=sys.stderr) try: sentence = line.decode('utf-8').strip() except: print('[ERROR] line:' + str(lineNum), file=sys.stderr) sentenceTmp = sentence sentence = re.sub('\W', ' ', sentence).strip() # 替换掉非文字 if not localbt.isValidSent(sentence): print('[Invalid]' + sentenceTmp) continue sentenceTmp = re.sub('[\[\]]', '"', sentenceTmp) with open(outPath, 'a') as f: f.write("[" + sentenceTmp + "]" + ",") segResult = list(jieba.cut(sentence)) score = sa.sentimentScore(segResult) flag = "__POS__" if score[0] > score[1] else \ ("__NEG__" if score[0] < score[1] else "__EQU__") with open(outPath, 'a') as f: f.write('[' + str(score[0]) + ', ' + str(score[1]) + ", " + flag + "]" + ",") result = ', '.join(getRidInSet(segResult, meaninglessSet)) f.write(result + '\n')
def analyse(foo, text): print("RUNNING ANALYSIS") tweets = text.split("TEXT: ") #print(tweets) Results = SentimentAnalysis.AnalyseTweets(tweets) #print(Results) print("Analysis complete") retString = json.dumps(Results) return retString
def getKeyWordAndTrackInfo(): for i in self.topHeadlines['articles']: keyWords.append(sa.key_phrases(i['title'])) time.sleep(2) for j in keyWords: self.trackInfo.append(ss.getSong(j)) self.displayTrackBtn.setEnabled(True) print(keyWords) print(self.trackInfo)
def main(): #charDict = ParsePlay.getAllTopChars(1) # Char Dict contains a bunch of chars w/ names as keys charDict = ParsePlay.parsePlay('./shaks200/dream.xml') # Char Dict contains a bunch of chars w/ names as keys charDictScaled = ScaleTime.rescaleTime(charDict) charScores = SentimentAnalysis.turn_lines_to_score(charDictScaled) charScoresInterpolated = Interpolate.interpolate_chars_uniformly(charScores, 100) charScoresFiltered = LowPassFilter.lowPassAllChars(charScoresInterpolated) pp = pprint.PrettyPrinter() pp.pprint(charScoresFiltered)
def argue(): query = [x for x in request.form.values()][0] print(query) global start, topic, links, articles, corpus, sent_tokens, word_tokens, models, tagged_sentences in_corpus, in_sent_tokens, in_word_tokens = Preprocessing.data_preprocessing( query) #Get sentences of same context as the input query. same_context = [] for i in in_sent_tokens: for model in models: same_context += ContextAnalysis.get_similar_sentences( i, model, tagged_sentences, top_n=5) #Get the sentences of the same and reasonable polarity. sent_score = SentimentAnalysis.get_sentence_polarity(same_context) out = [] for k in range(len(in_sent_tokens)): out.append( SentimentAnalysis.find_sentences(in_sent_tokens[k], same_context, sent_score, similar=True, top_n=10)[0]) out.append( SentimentAnalysis.find_sentences(in_sent_tokens[k], same_context, sent_score, similar=True, top_n=10)[1]) #output processing out = list(set(out)) print('Input:\n', query) print('Output:\n', '.'.join(out)) print("Time taken Checkpoint-3:{} mins.".format( (time.time() - start) / 60)) return render_template('index.html', argument='What I have to say is....\n {}'.format( '.'.join(out)))
def prediction(self): text = self.textEdit.toPlainText() pred = SentimentAnalysis.test(text) # print(pred) if pred == 'Negative': self.negCount += 1 else: self.posCount += 1 self.readWrite(self.negCount, self.posCount) self.saveReview(text,pred)
def getRecommendation(): ''' This function updates the currentSentimentScore to estimated sentimentScore. Restart tracking sentimentScore ''' global currentSentimentScore #Getting the current score and tracks for the score currentSentimentScore = SentimentAnalysis.getSentimentScore() trackObjs = getUserRecommendation() #Reseting the sentimentScores SentimentAnalysis.reset() currentSentimentScore = SentimentAnalysis.initSentimentScore() htmlSnippet = render_template('recommendation.html', trackObjs=trackObjs) data = jsonify({'datax': htmlSnippet}) return data
def analyze(sentence=None): retval = None if request.method == 'POST': sentence = request.form['sentence'] if sentence is not None: retval = SentimentAnalysis.Analyse_Raw(sentence) if retval['compound'] >= 0: retval['result'] = 'Pos' else: retval['result'] = 'Neg' return jsonify(retval)
def __init__(self, configFilepath=None): #configFilepath = r"C:\Users\ashvin\Desktop\UnderDevelopment\sentimentConfig.csv" df = pd.read_csv(configFilepath) df.set_index('Parameters', inplace=True) reviews = df.loc['paths', 'Value'].split(',') catNum = df.loc['numOfCat', 'Value'] catTypes = df.loc['catType', 'Value'].split(',') sentimentLabel = df.loc['train', 'Value'] sentiment = sa.SentimentTrain() acc, allReviews = sentiment.extract(reviews, catNum, catTypes, sentimentLabel) visuals = Visualisation(df=None, target=None) visuals.wordCloud(allReviews)
def updateSystemDataset(): mycompanies = dbmanager.getCompanies() for company in mycompanies: ticker = company[1] name = company[0] sentiment = analysis.pullTweets(name) dbmanager.insertSentimentResults(name,sentiment[0],sentiment[1], sentiment[2], sentiment[5], sentiment[4], sentiment[3]) healthdata = health.extract_healthdata(ticker,'1') dbmanager.insertHealtResults(name,healthdata[0],healthdata[1], healthdata[2],healthdata[3],healthdata[4],healthdata[5],healthdata[6], healthdata[7],healthdata[8],healthdata[9],healthdata[10], healthdata[11],healthdata[12])
def main(): charDict = ParsePlay.getAllTopChars(5) # Char Dict contains a bunch of chars w/ names as keys charDictScaled = ScaleTime.rescaleTime(charDict) charScores = SentimentAnalysis.turn_lines_to_score(charDictScaled) charScoresInterpolated = Interpolate.interpolate_chars_uniformly(charScores, 100) charScoresFiltered = LowPassFilter.lowPassAllChars(charScoresInterpolated, window_ratio=.2) charScoresScaled = ScaleScores.scale_all_scores(charScoresFiltered) train, test = getTrainTestSplit(charScoresScaled, numTest=1) clusters = CharacterKMeans.characterKMeans(train, 5) pp = pprint.PrettyPrinter() chars = zip(*clusters)[1] pp.pprint(chars) newWithPredicted = CharacterKMeans.predictCluster(test, zip(*clusters)[0], charScoresScaled) pp.pprint(newWithPredicted)
def __init__(self, df=None): #configFilepath = r"C:\Users\ashvin\Desktop\UnderDevelopment\sentimentConfig.csv" #df = pd.read_csv(configFilepath) #df.set_index('Parameters', inplace = True) print('creating Results directory--------> at {}'.format(os.getcwd())) try: os.makedirs('results') except: pass reviews = df.loc['paths', 'Value'].split(',') catNum = df.loc['numOfCat', 'Value'] catTypes = df.loc['catType', 'Value'].split(',') sentimentLabel = df.loc['train', 'Value'] sentiment = sa.SentimentTrain() acc, allReviews = sentiment.extract(reviews, catNum, catTypes, sentimentLabel) visuals = Visualisation(df=None, target=None) visuals.wordCloud(allReviews)
def main(): # Removing results file from last run del_last_results_file() # creating object of TwitterClient Class api = sa.TwitterClient() # calling function to get tweets print("Enter search term") tweets = api.get_tweets(query=sys.stdin.read(), count=1000) # saving data to excel file save_data("Training Set", tweets) # Preparing input list for classifier classifier_input_list = prepare_classifier_input(tweets) # Training classifier with already fetched tweets # train_classifier(classifier_input_list) # Fetch classifier test data from twitter print("Enter search term for feeding tweets to test classifier: ") test_tweets = api.get_raw_tweets(query=sys.stdin.read(), count=1000) # Testing classifier classifier_output_list = classify_data(test_tweets, classifier_input_list) save_data("Result Set", classifier_output_list) # picking positive tweets from tweets ptweets = [tweet for tweet in tweets if tweet['sentiment'] == 'positive'] # percentage of positive tweets pos_tweet_percentage = 100 * len(ptweets) / len(tweets) print("Positive tweets percentage: {} %".format(pos_tweet_percentage)) # picking negative tweets from tweets ntweets = [tweet for tweet in tweets if tweet['sentiment'] == 'negative'] # percentage of negative tweets neg_tweet_percentage = 100 * len(ntweets) / len(tweets) print("Negative tweets percentage: {} %".format(neg_tweet_percentage)) # percentage of neutral tweets print("Neutral tweets percentage: {} %".format(100 - pos_tweet_percentage - neg_tweet_percentage)) # printing first 10 positive tweets print("\nPositive tweets: ") for tweet in ptweets[:10]: print(tweet['text']) # printing first 10 negative tweets print("\n\nNegative tweets:") for tweet in ntweets[:10]: print(tweet['text'])
def getCorrectSpelling(): if request.method != 'POST': return json.dumps({ "Status": "ERROR", "DATA": None, "Reason": "Only accept POST request" }) if not request.headers['Content-Type'] == 'application/json': return json.dumps({ "Status": "ERROR", "DATA": None, "Reason": "Only accept Content-Type:application/json" }) if not request.is_json: return json.dumps({ "Status": "ERROR", "DATA": None, "Reason": 'Expecting json data in the form {"data":"VALUE"}' }) try: data = dict(request.json) sentence = data["sentence"] result = SentimentAnalysis.getResult(sentence) return json.dumps({ "Status": "SUCCESS", "DATA": str(result), "Reason": "" }) except Exception as e: return json.dumps({"Status": "ERROR", "DATA": None, "Reason": str(e)})
class AnalysisOfSentiment: def __init__(self): self.obj = SentimentAnalysis() # def GetSentiment(self, text): # train = self.obj.GetTrainDataSet() # dict = self.obj.GetDictionaryOfTrainData(train) # t = self.obj.GetSampleDataForTraining(train, dict) # classifier = self.obj.TrainNaiveBayesClassifier(t) # features = self.obj.GetDataFeatures(text, dict) # result = self.obj.GetClassifiedResult(classifier, features) # return result def GetVaderSentimentIntensity(self, text): result = self.obj.VaderSentimentIntensityAnalyzer(text) return result def GetTextBlobSentimentAnalyzer(self, text): result = self.obj.TextBlobSentimentAnalyzer(text) return result def GetAzureSentimentAnalyzer(self, text): result = self.obj.AzureSentimentAnalyzer(text) return result def GetStanfordCoreNLPSentimentAnalyzer(self, text): result = self.obj.StanfordCoreNLPSentimentAnalyzer(text) return result def GetGoogleSentimentAnalyzer(self, text): result_score, result_magnitude = self.obj.GoogleSentimentAnalyzer(text) return result_score, result_magnitude def GetIBMWatsonSentimentAnalyzer(self, text): result_score, result_label = self.obj.IBMWatsonSentimentAnalyzer(text) return result_score, result_label
import os import sys import re from BarrageTool import BarrageTool from JiebaSegment import jiegSeg import SentimentAnalysis as localsa # filePath = 'temp' # filePath = './highlightClips.log' DICT_PATH = './dict/' sa = localsa.SentimentAnalysis(DICT_PATH) def sentEmotDetect(sentence): segResult = jiegSeg(sentence) score = sa.sentimentScore(segResult) return score def processFile(localbt, filePath, logPath, subPath, inlPath): lineNum = 0 maxLine = False # barrageReStr = r'[0-9\]: \/]+room\[[0-9]+\] uid\([0-9]+\).*\]: (.*)$' barrageReStr = r'[0-9\]: \/]+room\[[0-9]+\] uid\([0-9]+\) +event\[[\w ]*\].*\]: (.*)$' barrageRe = re.compile(barrageReStr) with open(filePath, 'rb') as f: for line in f.readlines(): if maxLine and lineNum > maxLine: break lineNum += 1
def getnews(symbol): return SentimentAnalysis.get_google_news(symbol)
cur = begin all_topics = {} cumulativeTopics = [] all_sentiment = {} cumulativeSentiment = [] if stride < 1: stride = 1 if stride is None: stride = len(input) # print("STRIDE:",stride) #trains corpus classifier, tagList = SentimentAnalysis.trainCorpus(corpus) #parses input for i in range((end - begin) // stride): for sentence in input[cur:cur + stride]: if "utterance" not in sentence: continue taggedSentences = contextsummary.posTag(sentence["utterance"]) speaker = sentence["speaker"] #context topic = contextsummary.sentenctExtract(taggedSentences) for top in topic: stride_topics[top] = stride_topics.get(top, 0) + 1 all_topics[top] = all_topics.get(top, 0) + 1
def index(): return "<p>Hello World!</p>" + "<p>" + sa.sentiment( movie_reviews.raw('pos/cv008_29435.txt')) + "</p>"
#Call function to obtain recent TweetIds from file USER_LIST = get_recent_tweet_ids_from_file() # Print each tweet in the stream to the screen for user in USER_LIST: print(user.tsn + ", " + user.tid) #Fetch twitter data ITERATOR = TWITTER.statuses.user_timeline(screen_name=user.tsn, since_id=user.tid, trim_user="******", exclude_replies="true", tweet_mode="extended") #ITERATOR = TWITTER.statuses.user_timeline(screen_name=user.tsn,since_id = user.tid,exclude_replies="true") # Print list of tweets for tweet in ITERATOR: sentiment_analysis = SentimentAnalysis.get_sentiment_analysis( tweet['full_text']) # Twitter Python Tool wraps the data returned by Twitter # as a TwitterDictResponse object. # We convert it back to the JSON format to print/score print(json.dumps(tweet['id'])) # print (json.dumps(tweet['user']['name'])) # print (json.dumps(tweet['user']['screen_name'])) print(json.dumps(tweet['created_at'])) print(json.dumps(tweet['full_text'])) print(sentiment_analysis) print('') print('') print("Tweet fetch complete.") print('')
def main(): """ This method runs topic modelling through LDA. Note: code adapted from Lecturer Jeffrey Chan :return: """ # tweets json filename jsonFilename = "uberTweetsUS.json" # returns tweetTokens and tweetDates tweets = tp.getTweetDf(jsonFilename, type="topic", removeFreqWords=True) featureNum = 250 # this is the number of features/words to used to describe our documents wordNumToDisplay = 20 # number of words to display for each topic topicNum = 3 # number of topics to be created # Count Vectorizer tfVectorizer = CountVectorizer(max_df=0.95, min_df=10, max_features=featureNum, lowercase=False, stop_words=None) # Create a term document matrix tf = tfVectorizer.fit_transform(tweets["tweetTokens"]) # Extract the names of the features - words tfFeatureNames = tfVectorizer.get_feature_names() # Set seed to allow reproducibility of results seed(7777) # Implement topic modeling using LDA ldaModel = LatentDirichletAllocation(n_components=topicNum, max_iter=10, learning_method='online').fit(tf) # Print out topics display_topics(ldaModel, tfFeatureNames, wordNumToDisplay) ### The following set of codes were adapted from https://www.machinelearningplus.com/nlp/topic-modeling-python-sklearn-examples/ # The codes below aims to assign a topic to each tweet based on the constructed topic model # also the overall topic distribution is also obtained # Obtain ldaModel output lda_output = ldaModel.transform(tf) topicNames = ["Topic" + str(i) for i in range(topicNum)] # topic names e.g., Topic 0, 1, .. tweetNames = ["Tweet" + str(i) for i in range(len(tweets["tweetTokens"])) ] # tweet names e.g., Tweet 0, 1, .. # Make a pandas dataframe # this dataframe has assigned probabilities that a certain tweet is topic, 0, 1, or 2 tweets_and_topics = pd.DataFrame(np.round(lda_output, 2), columns=topicNames, index=tweetNames) # Get dominant topic for each tweet # Return topic for a certain tweet if probability to a certain topic is the highest tweet_dominant_topic = np.argmax(tweets_and_topics.values, axis=1) tweets_and_topics["dominant_topic"] = tweet_dominant_topic # Print Overall Topic Distribution print("Topic Distribution") df_topic_distribution = tweets_and_topics["dominant_topic"].value_counts( ).reset_index(name="Num Documents") df_topic_distribution.columns = ["Topic Number", "Number of Tweets"] print(df_topic_distribution) # Display word cloud displayWordcloud(ldaModel, tfFeatureNames) # Apply sentiment analysis to each constructed topics # returns tweetTokens and tweetDates # do this again to obtain tweet tokens in a format ready for sentiment analysis tweets = tp.getTweetDf(jsonFilename, removeFreqWords=True) # get sentiments for each tweet vaderSentiments = sa.vaderSentimentAnalysis(tweets["tweetTokens"], printSentiment=False) # preprate data for plotting tweet_df = pd.DataFrame({ "Sentiments": vaderSentiments, "Date": tweets["tweetDates"], "DominantTopic": tweet_dominant_topic }) tweet_df["Sentiments"] = tweet_df["Sentiments"].apply(pd.to_numeric) # distribution of sentiments across Topics g = ggplot(aes(x='Sentiments'), data=tweet_df) + \ geom_histogram() + \ facet_wrap('DominantTopic', nrow=3) + \ labs(x="Sentiment Score", y="Frequency") print(g)
def getSentimentPositivityOf(entry): return SentimentAnalysis.sentiment_analysis(entry)
def model(): df = SA.run() sns.countplot(x='Decisions', data=df) X = [] sentences = list(df['Carddata']) for sen in sentences: X.append(preprocess_text(sen)) y = df['Decisions'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42) tokenizer = Tokenizer(num_words=10000) tokenizer.fit_on_texts(X_train) X_train = tokenizer.texts_to_sequences(X_train) X_test = tokenizer.texts_to_sequences(X_test) vocab_size = len(tokenizer.word_index) + 1 maxlen = 300 X_train = pad_sequences(X_train, padding='post', maxlen=maxlen) X_test = pad_sequences(X_test, padding='post', maxlen=maxlen) embeddings_dictionary = dict() glove_file = open('glove.42B.300d.txt', encoding="utf8") for line in glove_file: records = line.split() word = records[0] vector_dimensions = np.asarray(records[1:], dtype='float32') embeddings_dictionary[word] = vector_dimensions glove_file.close() embedding_matrix = np.zeros((vocab_size, 300)) for word, index in tokenizer.word_index.items(): embedding_vector = embeddings_dictionary.get(word) if embedding_vector is not None: embedding_matrix[index] = embedding_vector model = Sequential() embedding_layer = Embedding(vocab_size, 300, weights=[embedding_matrix], input_length=maxlen, trainable=False) model.add(embedding_layer) model.add(Flatten()) model.add(Dense(1, activation='sigmoid')) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) print(model.summary()) history = model.fit(X_train, y_train, batch_size=128, epochs=6, verbose=1, validation_split=0.2) score = model.evaluate(X_test, y_test, verbose=1) print("Score:", score[0]) print("Test Accuracy:", score[1])
def drawSentimentImage(pageSentimentDict,pagesDict,maxfreq): pageSentimentDict = SentimentAnalysis.scaling(pageSentimentDict)