Example #1
0
def classify(text, verbose=False):

    polarity = 'Neutral'

    pos_score, neg_score = senti_classifier.polarity_scores([text])
    neg_score = -neg_score

    if pos_score + neg_score > 0:
        polarity = "Positive"

    elif pos_score + neg_score < 0:
        polarity = "Negative"


    if verbose:
        return {

            'pos_score' : pos_score,
            'neg_score' : neg_score,
            'polarity' : polarity,
            'total_score' : pos_score + neg_score 

        }

    return polarity
Example #2
0
    def post(self):
        json_data = request.get_json(force=True)
        print json_data
        if 'sentences' not in json_data:
            return "Nope"
        sentences = json_data['sentences']
        swear_score = 0
        percentage = 0
        for swear in swear_words:
            for single_sentence in sentences:
                if swear in single_sentence:
                    swear_score +=1
        if swear_score:
            gloop = str(sentences)
            words = gloop.split(' ')
            print len(words)
            percentage = (float(swear_score) / len(words)) *100.0
            # Avert all ye eyes gentle folk, thar be monsters
            percentage = float("{:.2f}".format(percentage)) 
            # so dirty. I know there is a decimal module
            # but, this a hack day and writing this comment
            # took longer than this hack



        pos_score, neg_score = senti_classifier.polarity_scores(sentences)
        print sentences
        print pos_score, neg_score
        return {'positive_score': pos_score, 'negative_score': neg_score, 'swear_count': swear_score, 'swear_percentage': percentage}
def calculate_score(tweet, polarityDictionary):
    score = {}
    tweet = [i.lower().strip(specialChar) for i in tweet]
    tweet = [i for i in tweet if i]
    length = len(tweet)
    init = 0
    neutralScore = 0
    while init < length:
        for i in range(init, length):
            flag = 0
            for j in range(length, i, -1):
                phrase = frozenset(tweet[i:j])
                if phrase in polarityDictionary:
                    init = j
                    flag = 1
                    posScore = polarityDictionary[phrase][positive]
                    negScore = polarityDictionary[phrase][negative]
                    neutralScore = polarityDictionary[phrase][neutral]
                    score[phrase] = [posScore, negScore, neutralScore]
                    break
            if flag == 1:
                break
            else:
                posScore, negScore = senti_classifier.polarity_scores([tweet[i]])
                score[frozenset([tweet[i]])] = [posScore, negScore, neutralScore]
                polarityDictionary[frozenset([tweet[i]])] = [posScore, negScore, neutralScore]
    return score, polarityDictionary
def calculateScore(tweet, polarityDictionary):
    score = {}
    tweet = [i.lower().strip(specialChar) for i in tweet]
    tweet = [i for i in tweet if i]
    length = len(tweet)
    init = 0
    neutralScore = 0
    while init < length:
        for i in range(init, length):
            flag = 0
            for j in range(length, i, -1):
                phrase = frozenset(tweet[i:j])
                if phrase in polarityDictionary:
                    init = j
                    flag = 1
                    posScore = polarityDictionary[phrase][positive]
                    negScore = polarityDictionary[phrase][negative]
                    neutralScore = polarityDictionary[phrase][neutral]
                    score[phrase] = [posScore, negScore, neutralScore]
                    break
            if flag == 1:
                break
            else:
                posScore, negScore = senti_classifier.polarity_scores(
                    [tweet[i]])
                score[frozenset([tweet[i]
                                 ])] = [posScore, negScore, neutralScore]
                polarityDictionary[frozenset(
                    [tweet[i]])] = [posScore, negScore, neutralScore]
    return score, polarityDictionary
Example #5
0
def hmatrix_filtered():
    for w in hg.content_filtered_words_tops:
        # Quantity
        hg.content_filtered_words_matrix_tops[w[0]] = {'Qtde' : w[1]}
        # Tagged
        tagged = nltk.pos_tag([w[0]])
        hg.content_filtered_words_matrix_tops[w[0]]['Tag'] = tagged[0][1]
        # Synonym
        if wordnet.synsets(w[0]):
            hg.content_filtered_words_matrix_tops[w[0]]['Synonym'] = wordnet.synsets(w[0])[0].name()
        else:
            hg.content_filtered_words_matrix_tops[w[0]]['Synonym'] = "---"
        # Antonym
        # if wordnet.synsets(w[0]):
        #     hg.content_filtered_words_matrix_tops[w[0]]['Antonym'] = wordnet.antonyms()#(w[0])[0]
        # else:
        #     hg.content_filtered_words_matrix_tops[w[0]]['Antonym'] = "---"
        # Steammed
        hg.content_filtered_words_matrix_tops[w[0]]['Stemmed'] = ps.stem(w[0])
        # Lemmatized
        lem = lemmatizer.lemmatize(w[0])
        hg.content_filtered_words_matrix_tops[w[0]]['Lemm'] = lem
        # Sentiment Analyzer
        pos_score, neg_score = senti_classifier.polarity_scores(w[0])
        hg.content_filtered_words_matrix_tops[w[0]]['Score +'] = pos_score
        hg.content_filtered_words_matrix_tops[w[0]]['Score -'] = neg_score
def sentiment(string):
    sentimentScores = senti_classifier.polarity_scores([string])
    if sentimentScores[0]==sentimentScores[1]:
        return 'neutral'
    elif sentimentScores[0]>sentimentScores[1]:
        return 'pos'
    else:
        return 'neg'
 def addaffect(tweets):
     """ returns (tweet,freq,affect value) from (tweet, freq, related tweets) """
     for keyword in tweets:
         pos_score, neg_score = senti_classifier.polarity_scores(keyword[2])
         if pos_score > neg_score:
             keyword[2] = pos_score
         else:
             keyword[2] = neg_score
     return tweets
Example #8
0
def sentimentTweet(tweet):
    pos_score, neg_score = senti_classifier.polarity_scores([tweet])
    if pos_score > neg_score:
        vote = 1
    elif pos_score < neg_score:
        vote = -1
    else:
        vote = 0
    return vote
def get_sentiment(userinput):
    userinput = userinput.lower()
    if 'yes' in userinput:
        return True
    (pos, neg) = senti_classifier.polarity_scores([userinput])
    if pos > neg:
        return True
    else:
        return False
def calculateScore(tweet):
    score = {}
    tweet = [i.lower().strip(specialChar) for i in tweet]
    neutralScore = 0
    for i in range(len(tweet)):
        posScore, negScore = senti_classifier.polarity_scores([tweet[i]])
        #   print posScore
        #  print negScore
        score[frozenset([tweet[i]])] = [posScore, negScore, neutralScore]
    return score
	def bayesSentiment(self, text):
		from nltk.tokenize.punkt import PunktSentenceTokenizer
		from senti_classifier import senti_classifier

		# break up text into sentences
		stzr = PunktSentenceTokenizer()
		sents = stzr.tokenize(text)
		pos_score, neg_score = senti_classifier.polarity_scores(sents)
		#print pos_score, neg_score
		return [pos_score, neg_score]
Example #12
0
def computeSentiment(text):

	p,n=senti_classifier.polarity_scores(text)
	
	if p == n:
		return 0
	elif p > n:
		return 1
	else:
		return -1		
def calculateScore(tweet):
    score = {}
    tweet=[i.lower().strip(specialChar) for i in tweet]
    neutralScore=0
    for i in range(len(tweet)):
                posScore, negScore = senti_classifier.polarity_scores([tweet[i]])
             #   print posScore
              #  print negScore
                score[frozenset([tweet[i]])]=[posScore, negScore, neutralScore]
    return score
Example #14
0
def classify_sentences(sents):
    """
    Return list of tuples (sentence, positive, negative)
    """
    results = []
    for sent in sents:
        pos_score, neg_score = senti_classifier.polarity_scores([sent])
        results.append([sent, pos_score, neg_score])

    return results
Example #15
0
def computeSentiment(text):

    p, n = senti_classifier.polarity_scores(text)

    if p == n:
        return 0
    elif p > n:
        return 1
    else:
        return -1
def sentiment_score(dataset, sample):
    count = pos_sum = neg_sum = 0
    for sentence in dataset:
        if count%sample==0:
            pos_score, neg_score = senti_classifier.polarity_scores([sentence])
            #print "pos_score: " + str(pos_score) + "  neg_score" + str(neg_score)
            pos_sum += pos_score
            neg_sum += neg_score
        count += 1
    sum_val = pos_sum + neg_sum
    pos_score,neg_score = pos_sum/max(0.0000001,sum_val),neg_sum/max(0.0000001,sum_val)
    return pos_score - neg_score
Example #17
0
def sentiment_score(dataset, sample):
    count = net_sum = 0
    for sentence in dataset:
        if count%sample==0:
            pos_score, neg_score = senti_classifier.polarity_scores([sentence])
            #print "pos_score: " + str(pos_score) + "  neg_score" + str(neg_score)
            count += 1
            if (pos_score - neg_score)>0:
                net_sum += 1
    length = count / sample + 1
    score = net_sum * 1.0 / length 
    return score
Example #18
0
def getSentiment(url,date,company,num,sentences):
	#print(url)
	#sentences = u.dataFromURL(url)
	#print(type(sentences))
	#print(sentences.split(". "))
	#sentences = sentences.split(". ")
    pos_score, neg_score = senti_classifier.polarity_scores(sentences);
    f = open("Articles/"+company+date+"_"+str(num)+'.txt', 'w');
    f.write(". ".join(sentences).encode('utf-8'));
    f.close();
    return(str(pos_score) + " " + str(neg_score))
    """
Example #19
0
def analyze_sents(content, blog_title):
    #     fo = io.open('/home/chaitrali/officework/nltkCode/blogs/24-hours-in-fort-kochi.html.txt', 'r+', encoding='utf8', newline="\r")
    #     content = fo.read()
    sents = sent_tokenize(content)
    #   print len(sents)
    pos_score, neg_score = senti_classifier.polarity_scores(sents)
    #print (blog_title, pos_score, neg_score)

    global overallPosScore
    overallPosScore += pos_score
    global overallNegScore
    overallNegScore += neg_score
    #print (blog_title, sents, pos_score, neg_score)

    blogwiseScores[blog_title] = pos_score, neg_score
    def on_data(self, data):
        #file = open('C:/Users/anshul/jupyter/sample.txt', 'a')

        all_data = json.loads(data)
        tweet = all_data["text"]
        pos, neg = senti_classifier.polarity_scores(tweet)

        print(tweet)
        print(pos, neg)
        time.sleep(0.3)

        #file.write(': ' + str(d))
        #file.write('\n')
        #file.close()

        return True
Example #21
0
def main() :
	
	fo = open("sentiment_data.txt","wb")
	
	prev_pos_scores = {}
	prev_neg_scores = {}
	for t in range(418) :
		doc = libxml2.parseFile("cams/cams_" + str(t + 1) + ".xml")
		ctxt = doc.xpathNewContext()
		res = ctxt.xpathEval("//specsKey[@name=\"reviewText\"]/text()")
		# print len(res),"//specsKey[@name=\"reviewText\"]/@name"
		
		pos_scores = 0
		neg_scores = 0
		num_reviews = 0
		for val in res :
			vals = val.content.split('.')
			sentences = []
			for valu in vals :
				sentences_temp = valu.split(',')
				for sentence in sentences_temp :
					sentences.append(sentence)

			tuple_sentences = tuple(sentences)
			if tuple_sentences in prev_pos_scores and tuple_sentences in prev_neg_scores :
				pos_score = prev_pos_scores[tuple_sentences]
				neg_score = prev_neg_scores[tuple_sentences]	
			else :	 
				try :	
					pos_score, neg_score = senti_classifier.polarity_scores(sentences)
				except :
					pos_score = neg_score = 0
			pos_scores += pos_score
			neg_scores += neg_score
			prev_pos_scores[tuple_sentences] = pos_score
			prev_neg_scores[tuple_sentences] = neg_score
			print tuple_sentences
			num_reviews += 1
					
		if num_reviews == 0 :
			fo.write("Document " + str(t + 1)  + " : 0 0\n")
			print "Document " + str(t + 1)  + " : 0 0\n"
		else :
			fo.write("Document " + str(t + 1) + " : " + str(pos_scores/num_reviews) + " " + str(neg_scores/num_reviews) + "\n")
			print "Document " + str(t + 1) + " : " + str(pos_scores/num_reviews) + " " + str(neg_scores/num_reviews) + "\n"
	
	fo.close()		 		
Example #22
0
    def computeSentimentScores(self, record, tokenizer):
        """
        record is a dict which must have record['quote_text']. It normally should have record['quote_id'] or record['vin_id']
        tokenizer is a tokenizer with a tokenize method. The unit of analysis (e.g., word, ngram, sentence) is determined by the tokenizer passed in
        """
        self.text = record['quote_text']

        # To allow this to be used with arbitrary inputs
        try:
            self.quoteID = record['quote_id']
        except:
            try:
                self.quoteID = record['vin_id']
            except:
                # Make random ID if none exists
                self.quoteID = 'ID' + str(np.random.rand())

        # Tokenize the text into the appropriate units
        self.tokens = tokenizer.tokenize(self.text)

        # Calc number of tokens in the record
        self.numTokens = len(self.tokens)

        # Calc sentiment scores
        self.pos_score, self.neg_score = senti_classifier.polarity_scores(
            self.tokens)

        # Averages are needed because otherwise the score will vary with number of sentences
        # Average positive sentiment score of the record
        self.avgPos = self.pos_score / self.numTokens

        # Average negative sentiment of the record
        self.avgNeg = (self.neg_score / self.numTokens) * -1

        # Net average sentiment of the record
        self.netSent = self.avgPos + self.avgNeg

        # Objectivity score (from chris potts )
        self.obj_score = 1.0 - self.netSent

        # Put the results in a dictionary
        self.scores = dict(quoteID=self.quoteID,
                           avgPos=self.avgPos,
                           avgNeg=self.avgNeg,
                           netSent=self.netSent)

        return self.scores
Example #23
0
File: User.py Project: mohard/inz
 def fetchMyTweets(self, count = 0, page = 0):
     if self.twitter_api_ == None:
         return
     timeline = []
     if count == 0:
         i = 0
         do_loop = True
         while do_loop == True:
             #TODO przy malych ilosciach tweetow sciaga je 2 razy
             tmp_timeline = self.twitter_api_.GetUserTimeline(id = self.uid_, count = 200, page = i)
             if len(tmp_timeline) == 0 or i == 4:
                 do_loop = False
             timeline.extend(tmp_timeline)
             if len(tmp_timeline) < 100: ################TODO#########################
                 do_loop = False ########################TODO#########################
             i += 1
     else:
         timeline.extend(self.twitter_api_.GetUserTimeline(id = self.uid_, count = count, page = page))
         
     keyword_extractor = KeywordExtractor()
     for status in timeline:
         status_text = status.GetText()
         #pozbawienie tekstu linkow
         status_text = re.sub(r'http.*$', "", status_text)
         #wydobycie hashtagow
         pat = re.compile(r"#(\w+)")
         hash_tags = pat.findall(status_text)
         #uzyskanie danych na temat tego czy dany tweet jest pozytywny czy negatywny
         if self.context_ == True:
             pos_score, neg_score = senti_classifier.polarity_scores([status_text])
         else:
             pos_score = 0
             neg_score = 0
         context = pos_score - neg_score
         if context > 0:
             context = 1
         else:
             if context < 0:
                 context = -1
         #uzyskanie slow kluczowych na podstawie twit
         keywords = [ x[0] for x in keyword_extractor.extract(status_text)]
         keywords.extend(hash_tags)
         #dopisanie uzyskanych slow kluczowych do tablicy zawierajacej wszystki slowa kluczowe usera
         keywords_with_context = [(x,context) for x in keywords] 
         #print keywords_with_context[0][0], ",", keywords_with_context[0][1]
         self.all_key_words_.extend(keywords_with_context)
         self.tweets_.append([status_text, hash_tags, keywords, context])
def probTraining(priorScore):
    #Returns a Dictonary containing the probability of word being positive, negative, neutral

    wordProb = {}
    for i in priorScore.keys():
        if i:
            wordProb[i] = [0.0, 0.0, 0.0]
            posScore, negScore = senti_classifier.polarity_scores(list(i))
            if priorScore[i] > 0.0:
                wordProb[i][positive] = priorScore[i] / 5.0
                wordProb[i][negative] = negScore
            elif priorScore[i] < 0.0:
                wordProb[i][negative] = -(priorScore[i] / 5.0)
                wordProb[i][positive] = posScore
            else:
                wordProb[i][positive] = posScore
                wordProb[i][negative] = negScore

    return wordProb
Example #25
0
def probTraining(priorScore):
    '''creates a prior score'''
    wordProb = {}
    tweetCount = [0, 0, 0, 0]
    for i in priorScore.keys():
        if i:
            wordProb[i] = [0.0, 0.0, 0.0]
            posScore, negScore = senti_classifier.polarity_scores(list(i))
            if priorScore[i] > 0.0:
                wordProb[i][positive] = priorScore[i] / 5.0
                wordProb[i][negative] = negScore
            elif priorScore[i] < 0.0:
                wordProb[i][negative] = -(priorScore[i] / 5.0)
                wordProb[i][positive] = posScore
            else:
                wordProb[i][positive] = posScore
                wordProb[i][negative] = negScore

    return wordProb
    def computeSentimentScores(self, record, tokenizer):
        """
        record is a dict which must have record['quote_text']. It normally should have record['quote_id'] or record['vin_id']
        tokenizer is a tokenizer with a tokenize method. The unit of analysis (e.g., word, ngram, sentence) is determined by the tokenizer passed in
        """
        self.text = record['quote_text']

        # To allow this to be used with arbitrary inputs
        try:
            self.quoteID = record['quote_id']
        except:
            try:
                self.quoteID = record['vin_id']
            except:
                # Make random ID if none exists
                self.quoteID = 'ID' + str(np.random.rand())

        # Tokenize the text into the appropriate units
        self.tokens = tokenizer.tokenize(self.text)

        # Calc number of tokens in the record
        self.numTokens = len(self.tokens)

        # Calc sentiment scores
        self.pos_score, self.neg_score = senti_classifier.polarity_scores(self.tokens)

        # Averages are needed because otherwise the score will vary with number of sentences
        # Average positive sentiment score of the record
        self.avgPos = self.pos_score / self.numTokens

        # Average negative sentiment of the record
        self.avgNeg = (self.neg_score / self.numTokens) * -1

        # Net average sentiment of the record
        self.netSent = self.avgPos + self.avgNeg

        # Objectivity score (from chris potts )
        self.obj_score = 1.0 - self.netSent

        # Put the results in a dictionary
        self.scores = dict(quoteID=self.quoteID, avgPos=self.avgPos, avgNeg=self.avgNeg, netSent=self.netSent)

        return self.scores
def probTraining(priorScore):

	'''creates a prior score'''
	wordProb={}
	tweetCount=[0,0,0,0]
	for i in priorScore.keys():
    		if i:
        		wordProb[i]=[0.0,0.0,0.0]
        		posScore, negScore = senti_classifier.polarity_scores(list(i))
        		if priorScore[i]>0.0:
            			wordProb[i][positive]=priorScore[i]/5.0
            			wordProb[i][negative]=negScore
        		elif priorScore[i]<0.0:
            			wordProb[i][negative]=-(priorScore[i]/5.0)
            			wordProb[i][positive]=posScore
        		else:
            			wordProb[i][positive]=posScore
            			wordProb[i][negative]=negScore
                
    	return wordProb
Example #28
0
def print_extract(tweets):
    x = PrettyTable(["Date", "Pos", "Neg", "Sentiment", "Text"])
    x.align["Text"] = "l"
    x.set_style(MSWORD_FRIENDLY)

    counter = 0

    for tweet in tweets:
        if "text" in tweet:
            text = tweet["text"]
        else:
            text = ""

        sentiment = classify(text)
        pos_score, neg_score = senti_classifier.polarity_scores([text])
        x.add_row([tweet["created_at_date"], pos_score, neg_score, sentiment, text])
        counter += 1
        print(str(counter) + "/" + str(len(tweets)))

    print(x)
Example #29
0
def classify_tweet(stemmed_tokens, sentences):
    pos_score, neg_score = senti_classifier.polarity_scores(sentences)
    print pos_score, neg_score

    tweet_emotion = 'Neutural'

    emotionCnt = {
        'Happy': 0,
        'Sad': 0,
        'Anger': 0,
        'Fear': 0,
        'Surprise': 0,
        'Disgust': 0
    }

    for token in stemmed_tokens:
        if token in happyList: emotionCnt['Happy'] += 1
        elif token in sadList: emotionCnt['Sad'] += 1
        elif token in angerList: emotionCnt['Anger'] += 1
        elif token in fearList: emotionCnt['Fear'] += 1
        elif token in surpList: emotionCnt['Surprise'] += 1
        elif token in disgList: emotionCnt['Disgust'] += 1
    print('happy count:', emotionCnt['Happy'])
    print('sad count:', emotionCnt['Sad'])
    print('anger count:', emotionCnt['Anger'])
    print('fear count:', emotionCnt['Fear'])
    print('surprise count:', emotionCnt['Surprise'])
    print('disgust count:', emotionCnt['Disgust'])

    max_emotion = max(emotionCnt.keys(), key=(lambda k: emotionCnt[k]))

    if max(emotionCnt.values()) == 0:
        tweet_emotion = 'Neutural'
    elif pos_score > neg_score:
        if max_emotion == 'Happy' or max_emotion == 'Surprise':
            tweet_emotion = max_emotion
    else:
        if max_emotion != 'Happy':
            tweet_emotion = max_emotion

    return tweet_emotion
def probTraining(priorScore):
    """trainFile is a file which contain the traind data is following format
    tokenizedTweet\tpos\tlabel\n it return the dictonary comtaining the prob of word being positive, negative, neutral"""

    wordProb = {}
    tweetCount = [0, 0, 0, 0]
    for i in priorScore.keys():
        if i:
            wordProb[i] = [0.0, 0.0, 0.0]
            posScore, negScore = senti_classifier.polarity_scores(list(i))
            if priorScore[i] > 0.0:
                wordProb[i][positive] = priorScore[i] / 5.0
                wordProb[i][negative] = negScore
            elif priorScore[i] < 0.0:
                wordProb[i][negative] = -(priorScore[i] / 5.0)
                wordProb[i][positive] = posScore
            else:
                wordProb[i][positive] = posScore
                wordProb[i][negative] = negScore

    return wordProb
def probTraining(priorScore):
    """trainFile is a file which contain the traind data is following format
    tokenizedTweet\tpos\tlabel\n it return the dictonary comtaining the prob of word being positive, negative, neutral"""

    wordProb = {}
    tweetCount = [0, 0, 0, 0]
    for i in priorScore.keys():
        if i:
            wordProb[i] = [0.0, 0.0, 0.0]
            posScore, negScore = senti_classifier.polarity_scores(list(i))
            if priorScore[i] > 0.0:
                wordProb[i][positive] = priorScore[i] / 5.0
                wordProb[i][negative] = negScore
            elif priorScore[i] < 0.0:
                wordProb[i][negative] = -(priorScore[i] / 5.0)
                wordProb[i][positive] = posScore
            else:
                wordProb[i][positive] = posScore
                wordProb[i][negative] = negScore

    return wordProb
Example #32
0
def classify(text, verbose=False):

    polarity = 'Neutral'

    pos_score, neg_score = senti_classifier.polarity_scores([text])
    neg_score = -neg_score

    if pos_score + neg_score > 0:
        polarity = "Positive"

    elif pos_score + neg_score < 0:
        polarity = "Negative"

    if verbose:
        return {
            'pos_score': pos_score,
            'neg_score': neg_score,
            'polarity': polarity,
            'total_score': pos_score + neg_score
        }

    return polarity
def process_files():
        matches = []
        for root, dirnames, filenames in os.walk(DIR_NAME):
                for filename in fnmatch.filter(filenames, MATCH):
                        matches.append(os.path.join(root, filename))

	out_f = open(OUT_DIR_NAME+'nltk_sentiments','r')
        # stop from processing same files if rerun
	for line in out_f.readlines():
		matches.remove(line.split(',')[0])
	out_f.close()
	
	out_f = open(OUT_DIR_NAME+'nltk_sentiments','a')
	for match in matches:
		print match

		f = open(match, 'r')
		text = f.readlines()
		pos_score, neg_score = senti_classifier.polarity_scores(text)
    		netScore = pos_score - neg_score
    		print netScore
		out_f.write(match +',' + str(netScore) +'\n')
Example #34
0
def processEntrySentiment(strText, msg):
  negCount = 0
  posCount = 0
  posList = []
  negList = []
  sentenceArray = [] 
  if strText:  
    text = TextBlob(strText)
    count = 0                 
    for sentence in text.sentences:
        sentenceArray.append(str(sentence))
        blob = TextBlob(str(sentence), analyzer=NaiveBayesAnalyzer())
        if blob.sentiment.classification == 'neg':
          negCount += 1
          negList.append(str(sentence))
        else:
          posCount += 1
          posList.append(str(sentence))   
    
    # Another module sentiment indicator to confirm results by comparison 
    pos_score, neg_score = senti_classifier.polarity_scores(sentenceArray)
    print ("Comparative Positive score: "+str(pos_score))
    print ("Comparative Negative score: "+str(neg_score))
     
    print ("Negative Indicators: ")    
    print (negList)
    print ("Positive Indicators: ")
    print (posList)
    if negCount == posCount:                                                                    
      print ("Overall Sentiment: Neutral")
    else:
      if negCount > posCount:
        print ("Overall Sentiment: Negative")
      else:
        print ("Overall Sentiment: Positive") 
  else:
    print(msg)
  return
Example #35
0
def tweet(request, id):
    import nltk
    from senti_classifier import senti_classifier
    t = TwitterPost.objects.all().filter(id=id).first()
    sentence = t.text
    tokens = nltk.word_tokenize(sentence)
    pos_score, neg_score = senti_classifier.polarity_scores([t.text])
    tagged = nltk.pos_tag(tokens)
    import os
    from nltk.tree import Tree
    from nltk.draw.tree import TreeView
    tr = Tree.fromstring('(S (NP this tree) (VP (V is) (AdjP pretty)))')
    TreeView(tr)._cframe.print_to_file('output.ps')
    os.system('convert output.ps output.png')
    os.system('cp output.png static/tree.png')
    entities = nltk.chunk.ne_chunk(tagged)

    data = {"twitt": t,
            "tokens": tokens,
            "tags": entities,
            "pos_score": pos_score,
            "neg_score": neg_score}
    return render(request, "tweet.html", data)
Example #36
0
    if pos_score > neg_score:
        return "positive"
    elif neg_score > pos_score:
        return "negative"
    elif pos_score == neg_score:
        return "neutral"

def convertDateTime(dt):
    return parser.parse(dt)

client = MongoClient()
db = client.blackhole
tweets_c = db.singularity
sentiment_c = db.sentiment

for post in tweets_c.find({},{'_id':0, 'text':1, 'created_at':1, 'user.location':1}):
    
    dateTime = convertDateTime(post['created_at'])
    text = escapeSpecialCharacters( post['text'], '\'"/\\' )
    pos_score, neg_score = senti_classifier.polarity_scores([text])
    sentiment = getSentiment(pos_score, neg_score)

    doc = { "queryable": "APPLE",
            "pos_score": pos_score,
            "neg_score": neg_score,
            "sentiment": sentiment,
            "dateTime": dateTime }  
    
    sentiment_c.insert(doc)

    print doc
Example #37
0
def sentimentalize(s):
	pos_score, neg_score = senti_classifier.polarity_scores([s])
	return [pos_score, neg_score]
Example #38
0
def arg_max(iterable):
    return max(enumerate(iterable), key=lambda x: x[1])[0]


def polarity(index):
    if index == 0:
        return '1'
    elif index == 1:
        return '-1'


def clean(text, exclude):
    return ''.join(ch for ch in text if ch not in exclude)


exclude = set(punctuation)

# Write to output
with open(sys.argv[2], 'w') as fout:
    writer = UnicodeWriter(fout, delimiter='\t')
    # Read dataset
    with open(sys.argv[1], 'r') as fin:
        for l_i, line in enumerate(fin):
            line = to_unicode(line).strip()
            line = clean(line, exclude)
            pos_neg = senti_classifier.polarity_scores([line])
            pol = polarity(arg_max(pos_neg))
            line_number = to_unicode(str(l_i))
            writer.writerow([line_number, pol])
    currentAbstract = []
    currentAbstract.append(row[123]+row[124])
    # string containing title and subjects for each term
    currentSubject = row[1]+row[71]+row[72]+row[73]+row[123]+row[124]
    #string containing current abstract and author abstract
    currentString = row[123]+row[124]
    #get frequency distribution of topic and compare to threshold
    fd = FreqDist(nltk.tokenize.word_tokenize(str.lower(currentSubject)))
    #get frequency of word
    fdTopic =  fd[str.lower(topic)]
    
    if fdTopic > threshold:
        publicationDates.append(row[63])
        print "Processing"
        #get sentiment 
        pos_score, neg_score = senti_classifier.polarity_scores(currentAbstract)
        netScore = pos_score - neg_score
        # append netScore to array
        netScoreList.append(netScore)

        print row[1]
        if netScore > localMax:
            #get title of article with max positive sentiment
            localMax = netScore
            maxArticleTitle = row[1] + row[63]+str(counter)
        if netScore < localMin:
            #get title of article with min positive sentiment
            localMin = netScore
            minArticleTitle = row[1] + row[63]+str(counter)        

#write to output file
Example #40
0
        if not isinstance(obj, unicode):
            obj = unicode(obj, encoding)
    return obj

def arg_max(iterable):
    return max(enumerate(iterable), key=lambda x: x[1])[0]

def polarity(index):
    if index == 0:
        return '1'
    elif index == 1:
        return '-1'

def clean(text, exclude):
    return ''.join(ch for ch in text if ch not in exclude)

exclude = set(punctuation)

# Write to output
with open(sys.argv[2], 'w') as fout:
    writer = UnicodeWriter(fout, delimiter='\t')
    # Read dataset
    with open(sys.argv[1], 'r') as fin:
        for l_i, line in enumerate(fin):
            line = to_unicode(line).strip()
            line = clean(line, exclude)
            pos_neg = senti_classifier.polarity_scores([line])
            pol = polarity(arg_max(pos_neg))
            line_number = to_unicode(str(l_i))
            writer.writerow([line_number, pol])
Example #41
0
    writer = csv.writer(out, delimiter='\t')

inp = open('/home/rohan/Desktop/input/output-1-lem.csv', 'r')
reader = csv.reader(inp)

for row in reader:
    row_str = str(
        row
    )[2:
      -2]  #convert the row to string ignoring the square brackets and single quote
    row_arr = row_str.split('\\t')  # split the string with tab delimiter
    tweet = row_arr[2]  # 2 is the index of tweet text
    if tweet == '':
        continue
    tweet = [tweet]
    pos, neg = senti_classifier.polarity_scores(tweet)

    if DEBUG:
        print(pos, neg)

    if not DEBUG:
        if pos > neg:
            sentiment = 'positive'
        elif pos < neg:
            sentiment = 'negative'
        else:
            sentiment = 'neutral'
        writer.writerow([
            row_arr[0],  # username
            row_arr[1],  #original tweet
            row_arr[2],  # tweet
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import warnings,sys,os,argparse
from senti_classifier import senti_classifier
with warnings.catch_warnings():
    warnings.filterwarnings("ignore",category=DeprecationWarning)
    import gdata.youtube
    import gdata.youtube.service
import urlparse
youtube_service = gdata.youtube.service.YouTubeService()

def ids_from_urls(youtubeurls = []):
    video_ids = []
    for url in youtubeurls:
        url_data = urlparse.urlparse(url)
        query = urlparse.parse_qs(url_data.query)
        video_ids.append(query["v"][0])
    return video_ids

def comments(youtubeurls = []):
    comments = []
    for vid in ids_from_urls(youtubeurls):
        for comment in youtube_service.GetYouTubeVideoCommentFeed(video_id = vid).entry:
            comments.append(comment.content.text)
    return comments

if __name__ == '__main__':
    youtubeurls = ["http://www.youtube.com/watch?v=u1vASMbEEQc"]
    allcomments = comments(youtubeurls)
    print senti_classifier.polarity_scores(allcomments)
Example #43
0
                    i += 1
            if i != 0:
                line_avg = line_avg / i
                sentinent_file.write(str(round(line_avg, 3)) + ", ")
            else:
                sentinent_file.write("0, ")

            line_avg = 0.0
            i = 0
            for sentence in score_min_negative:
                for scores in sentence:
                    line_avg += scores
                    i += 1
            if i != 0:
                line_avg = line_avg / i
                sentinent_file.write(str(round(line_avg, 3)) + ", ")
            else:
                sentinent_file.write("0, ")
            try:
                pos_score, neg_score = senti_classifier.polarity_scores(
                    sentences)
            except Exception as e:
                pos_score = neg_score = 0
            sentinent_file.write(
                str(pos_score) + ", " + str(neg_score) + ", ")

        sentinent_file.close()
        #end of loop tweets

import generate_matrix
subprocess.call(["python", "generate_matrix.py"], shell=False)
Example #44
0
 def sentiClassfierScore(self,doc):
     pos_score,neg_score = senti_classifier.polarity_scores([doc])
     result = {'pos_score':pos_score,'neg_score':neg_score}
     return result
def get_api_keys(api_keys):
    #return {'consumer_secret': 'a4ksdf7s, 'consumer'....
    if len([key for key in api_keys.values() if key]) <4:
        keys = open('_twitter.api','r').readlines()
        for line in keys:
            key,val  = [k.strip() for k in line.split('\t') if k]
            api_keys[key] = val
    return api_keys

def twitter_api():
    _api_keys = get_api_keys(api_keys)
    api = twitter.Api()
    api = twitter.Api(consumer_key = _api_keys['consumer_key'],
                      consumer_secret = _api_keys['consumer_secret'],
                      access_token_key = _api_keys['access_token_key'],
                      access_token_secret = _api_keys['access_token_secret'])
    return api
def tweets(usr=None):
    api = twitter_api()
    statuses = api.GetUserTimeline("TheSJFC")
    _tweets = [s.text for s in statuses]
    return _tweets
    
if __name__ == '__main__':
    pos, neg = senti_classifier.polarity_scores(tweets(usr = "******"))
    print pos, neg
    
    
        

for row in reader:
    i += 1
    if i == 1:
        continue

    # row_str = str(row)[2:-2] #convert the row to string ignoring the square brackets and single quote
    # row_arr = row_str.split('\\t') # split the string with tab delimiter
    # tweet = row_arr[2] # 2 is the index of tweet text
    row_arr = row
    tweet = row[2]
    if tweet == "" or tweet == prev_tweet:
        continue
    prev_tweet = tweet
    tweet = [tweet]
    pos, neg = senti_classifier.polarity_scores(tweet)
    # pos,neg = 0,0

    if DEBUG:
        print (pos, neg)

    if not DEBUG:
        if pos > neg:
            # sentiment='positive'
            sentiment = 1
            count_pos += 1
        elif pos < neg:
            # sentiment='negative'
            sentiment = -1
            count_neg += 1
        else:
Example #47
0
def senti():
    for w in hg.content_summary:
        pos_score, neg_score = senti_classifier.polarity_scores(w)
        hg.content_summary[w].append({'positive': pos_score})
        hg.content_summary[w].append({'negative': neg_score})
Example #48
0
import MySQLdb as mdb
from senti_classifier import senti_classifier
#f = open('trial.txt' , 'r')
#sentences = f.read().rstrip()
#sentence_list = list()
#sentence_list.append(sentences)

#pos_score , neg_score = senti_classifier.polarity_scores(sentence_list)
#print pos_score , neg_score
con = mdb.connect('localhost' , 'root' , 'admin' , 'happiness_index')
cur = con.cursor()
cur.execute("SELECT * from geo_tweets")
con.commit()
numrows = int(cur.rowcount)
cur1 = con.cursor()
for x in range(0 , numrows):
    row = cur.fetchone()
    tweet = row[1]
    tweet_list = list()
    tweet_list.append(tweet)
    pos_score , neg_score = senti_classifier.polarity_scores(tweet_list)
    cur1.execute("update geo_tweets SET sent_pos = %s , sent_neg = %s WHERE id = %s" , (pos_score , neg_score , row[0]))
    print "success\n"
    con.commit()
    


Example #49
0
def sentimentalize(s):
    pos_score, neg_score = senti_classifier.polarity_scores([s])
    return [pos_score, neg_score]
 def test_against_data(self):
     for r in self.reviews_data['reviews']:
         sentences = [s.strip() for s in r['text'].split(".")]
         pos_score, neg_score = senti_classifier.polarity_scores(sentences)
         self.assertEqual(pos_score, r['pos'])
         self.assertEqual(neg_score, r['neg'])
Example #51
0
from senti_classifier import senti_classifier
sentences = ['The movie was the worst movie', 'It was the worst acting by the actors']
pos_score, neg_score = senti_classifier.polarity_scores(sentences)
print pos_score, neg_score
Example #52
0
"""
0.625
"""
##print happy.neg_score
"""
0.25
"""
##print happy.obj_score

# Works on sentences using senti_classifier
from senti_classifier import senti_classifier

s1 = ['I could only get out of the house twice today']
s2 = ['I got out of the house twice today']
##sentences = ['The movie was the best movie', 'It was the best acting by the actors']
print(s1)
pos_score, neg_score = senti_classifier.polarity_scores(s1)
print('positive:')
print(pos_score)
print('negative: ')
print(neg_score)

print(s2)
pos_score, neg_score = senti_classifier.polarity_scores(s2)
print('positive:')
print(pos_score)
print('negative')
print(neg_score)

##print synsets_scores['peaceful.a.01']['pos']
Example #53
0
    token = [i for i in token if not i in tweet_remove]
    token = [emoji_pattern.sub(r'', i) for i in token]
    tokens.append(token)

for to in tokens:
    key = ' '.join(to)
    sentences.append(key)

m = {}
scScore = 0
hlScore = 0
ssScore = 0
for i in range(len(sentences)):
    if (i >= 1001):
        break
    pos_score0, neg_score0 = senti_classifier.polarity_scores([sentences[i]])
    if pos_score0 > neg_score0:
        if acc[i] == 1:
            scScore += 1
        m[i] = [1]
    elif pos_score0 < neg_score0:
        if acc[i] == 0:
            scScore += 1
        m[i] = [0]
    else:
        if acc[i] == .5:
            scScore += 1
        m[i] = [.5]

    if hl[i] == "Neutral":
        if acc[i] == .5:
Example #54
0
import nltk
import glob
import os, sys

#nltk.download()

from senti_classifier import senti_classifier

path = '/home/stonehange/Desktop/txt_sentoken/pos/'

for filename in os.listdir(path):
#sentences = ['The movie was the worst movie', 'It was the worst acting by the actors']
    pos_score, neg_score = senti_classifier.polarity_scores(filename)
    print pos_score, neg_score

import csv
import re
from senti_classifier import senti_classifier
array = []
allratings = []
allRatings = []
with open('/home/suhas/Downloads/userTable.csv', 'r+') as f:
    with open('/home/suhas/Downloads/user.csv', 'wb') as f1:
        reader = csv.reader(f)
        writer = csv.writer(f1)
        for row in reader:
            #re.sub(r'[^\w]','',row[4])
            array.append(re.sub(r'!""', ' ', row[4]))
        for i in array:
            pos_score, neg_score = senti_classifier.polarity_scores([i])
            sum = pos_score + neg_score
            if (sum == 0):
                pos_percentage = 0
                rating = 0
            else:
                pos_percentage = (pos_score) / (pos_score + neg_score) * 100
                if (pos_percentage >= 85):
                    rating = 5
                elif (pos_percentage >= 70 and pos_percentage < 85):
                    rating = 4
                elif (pos_percentage >= 55 and pos_percentage < 70):
                    rating = 3
                elif (pos_percentage >= 40 and pos_percentage < 55):
                    rating = 2
                elif (pos_percentage >= 25 and pos_percentage < 40):
Example #56
0
def getScore(post):
    pos_score, neg_score = senti_classifier.polarity_scores([post])
    #print str(pos_score) + " " + str(neg_score)
    return [pos_score, neg_score]