def featureCalcu(text):
    answer = []
    value = sentiment(unicode(text))[0]
    degree = sentiment(unicode(text))[1]
    length = len(text.split())
    # calculate positive term number and negative term number
    i = 0
    pCount = 0
    nCount = 0
    sample_list = sentiment(unicode(text)).assessments
    while i < len(sample_list):
        if sample_list[i][1] > 0:
            pCount += 1
        elif sample_list[i][1] < 0:
            nCount += 1
        i += 1

    answer.append(value)

    answer.append(pCount / float(length))
    answer.append(nCount / float(length))
    answer.append(-1)
    answer.append(-1)

    return answer
コード例 #2
0
ファイル: test_en.py プロジェクト: jeexianwu/pattern
 def __test_sentiment(self):
     # Assert < 0 for negative adjectives and > 0 for positive adjectives.
     self.assertTrue(en.sentiment("wonderful")[0] > 0)
     self.assertTrue(en.sentiment("horrible")[0] < 0)
     self.assertTrue(en.sentiment(en.wordnet.synsets("horrible", pos="JJ")[0])[0] < 0)
     self.assertTrue(en.sentiment(en.Text(en.parse("A bad book. Really horrible.")))[0] < 0)
     # Assert that :) and :( are recognized.
     #self.assertTrue(en.sentiment(":)")[0] > 0)
     #self.assertTrue(en.sentiment(":(")[0] < 0)
     # Assert the accuracy of the sentiment analysis.
     # Given are the scores for Pang & Lee's polarity dataset v2.0:
     # http://www.cs.cornell.edu/people/pabo/movie-review-data/
     # The baseline should increase (not decrease) when the algorithm is modified.
     from pattern.db import Datasheet
     from pattern.metrics import test
     reviews = []
     for score, review in Datasheet.load(os.path.join(PATH, "corpora", "polarity-en-pang&lee.csv")):
         reviews.append((review, int(score) > 0))
     A, P, R, F = test(lambda review: en.positive(review), reviews)
     print A, P, R, F
     self.assertTrue(A > 0.74)
     self.assertTrue(P > 0.73)
     self.assertTrue(R > 0.75)
     self.assertTrue(F > 0.74)
     print "pattern.en.sentiment()"
コード例 #3
0
ファイル: answer-test.py プロジェクト: seanli310/NLP-project
def binaryhelper(question,ans):
    # get the NOUN, ADJ, ADV, VERB
    imp = ('NOUN', 'ADV', 'VERB','ADJ')
    flag = True
    q = nlp(question.decode('ascii'))
    a = nlp(ans.decode('ascii'))
    qlist = set()
    alist = set()
    pre = ""
    omit = ('well',)
    for token in q:
        if token.tag_ == 'NNP' or (str(token.lemma_) == "not" and (pre == "be" or pre == "do")):
            pre = str(token.lemma_)
            continue
        if str(token.pos_) in imp and not str(token.tag_).startswith('PRP') and str(token.lemma_)!= "do":
            qlist.add(str(token.lemma_))
        pre = str(token.lemma_)
        '''
        tt = ""
        for tmp in token.subtree:
            tt += str(tmp)
        print tt
        '''
            
    for token in a:
        #print token, token.tag_, token.pos_
        if str(token.pos_) in imp:
            alist.add(str(token.lemma_))
    
    #print alist,qlist
    for token in qlist:
        if token not in alist:
            #print token
            return False
    
    for token in alist:
        if token not in qlist:        
            if (token== "not" or sentiment(token) < 0):
                return False
    
    sq,t = sentiment(question)
    sa,t = sentiment(ans)
    
    if (sa == sq):
        #print 'same sentiment'
        return True
    elif sq == 0:
        if abs(sq)>0.1:
            print sq
            return False            
    elif (float(sa)-float(sq)/max(sa,sq))- 0.2 >= 0:
        return False       
    return True   
コード例 #4
0
	def on_data(self, data):
		try:
			all_data = json.loads(data)
			tweet = all_data["text"]
			print "[Tweet]:\n%s"%tweet
			print "\n[By]: %s"%all_data["user"]["screen_name"]
			print "[Polarity]: %s"%sentiment(tweet)[0]
			print "[Subjectivity]: %s"%sentiment(tweet)[1]
			print "---------------"
			time.sleep(1)
		except Exception,e:
			print str(e)
			return(True)
コード例 #5
0
ファイル: bydate.py プロジェクト: cubecnelson/FYP
    def group_data(self):
        for tweet in self.db.tweets.find({"entities.hashtags.text": re.compile(self.query, re.IGNORECASE)}).sort(
            [("_id", pymongo.DESCENDING)]
        ):
            tweetDate = (
                tweet["created_at"].split(" ")[5]
                + self.month[tweet["created_at"].split(" ")[1]]
                + tweet["created_at"].split(" ")[2]
            )
            myText = re.sub(
                r"\w+:\/{2}[\d\w-]+(\.[\d\w-]+)*(?:(?:\/[^\s/]*))*", "", tweet["text"].replace("#", "").replace("@", "")
            )
            response = math.ceil(sentiment(myText.replace("#", ""))[0] * 10) / 10
            response = int(abs(response * 10))

            nameExist = False
            for name in self.db.collection_names():
                if name == self.query:
                    nameExist == True
            if nameExist == False:
                self.db[self.query]  # create collection with hashtag as the name

            result = self.db[self.query].find_one({"Date": tweetDate})
            if result == None:
                data = {"0": 0, "1": 0, "2": 0, "3": 0, "4": 0, "5": 0, "6": 0, "7": 0, "8": 0, "9": 0, "10": 0}

                data[str(int(response))] = data[str(int(response))] + 1.0

                self.db[self.query].insert_one({"_id": tweetDate, "Date": tweetDate, "count": 1, "sentiment": data})
            else:
                data = result["sentiment"]

                data[str(response)] = data[str(response)] + 1.0

                self.db[self.query].update_one({"_id": tweetDate}, {"$inc": {"count": 1}, "$set": {"sentiment": data}})
コード例 #6
0
ファイル: views.py プロジェクト: mcmonster/britty_sentiment
def sentiment_analysis():
    file_contents             = request.json["file_contents"]
    include_strongly_negative = request.json["include_strongly_negative"]
    include_strongly_positive = request.json["include_strongly_positive"]

    print "FileContents:", file_contents
    print "IncludeStronglyNegative:", include_strongly_negative
    print "IncludeStronglyPositive:", include_strongly_positive

    sentences = file_contents.split(".")
    results   = ""
    for iter in range(len(sentences)):
        polarity, subjectivity = sentiment(sentences[iter])
       
        if ((include_strongly_positive and (polarity > 0.7)) or
            (include_strongly_negative and (polarity < -0.7))):
            results += '<div class="entry">'
            results += "<b>File:</b>&nbsp;&nbsp;" + request.json["file"] + "&nbsp;&nbsp;"
            results += "<b>Polarity:</b>&nbsp;&nbsp;" + str(polarity) + "&nbsp;&nbsp;"
            results +=" <b>Subjectivity:</b>&nbsp;&nbsp;" + str(subjectivity) + "<br />"
            if iter-2 >= 0:
                results += sentences[iter-2] + " "
            if iter-1 >= 0:
                results += sentences[iter-1] + " "
            results += '<span style="color:red; font-size: 150%;">' + sentences[iter] + ' </span>'
            if iter+1 < len(sentences):
                results += sentences[iter+1] + " "
            if iter+2 < len(sentences):
                results += sentences[iter+2] + " "
            results += "</div><br />"

    return results
コード例 #7
0
ファイル: hashtag_scraper.py プロジェクト: bkj/jinsta
def getValues(j):
	latitude,longitude = '',''
	caption = ""

	iid = 'inst_' + j['id']
	if 'caption' in j and j['caption'] != None:
		#print j['caption']
		caption = j['caption']['text'].replace('\n',' ').replace('\t',' ')
	if 'location' in j and not j['location'] == None:
		if 'latitude' in j['location']:
			latitude = j['location']['latitude']
		if 'longitude' in j['location']:
			longitude = j['location']['longitude']
			#print 'here'
	return [j['user']['username'],
	iid,
	j['link'],
	datetime.datetime.fromtimestamp(float(j['created_time'])).isoformat(),
	str(sentiment(caption)[0]),
	str(False),
	"",
	caption,
	str(latitude),
	str(longitude)
	]

	'''
def analyze_sentiment_pattern_lexicon(review, threshold=0.1,
                                      verbose=False):
    # pre-process text
    review = normalize_accented_characters(review)
    review = html_parser.unescape(review)
    review = strip_html(review)
    # analyze sentiment for the text document
    analysis = sentiment(review)
    sentiment_score = round(analysis[0], 2)
    sentiment_subjectivity = round(analysis[1], 2)
    # get final sentiment
    final_sentiment = 'positive' if sentiment_score >= threshold\
                                   else 'negative'
    if verbose:
        # display detailed sentiment statistics
        sentiment_frame = pd.DataFrame([[final_sentiment, sentiment_score,
                                        sentiment_subjectivity]],
                                        columns=pd.MultiIndex(levels=[['SENTIMENT STATS:'], 
                                                                      ['Predicted Sentiment', 'Polarity Score',
                                                                       'Subjectivity Score']], 
                                                              labels=[[0,0,0],[0,1,2]]))
        print sentiment_frame
        assessment = analysis.assessments
        assessment_frame = pd.DataFrame(assessment, 
                                        columns=pd.MultiIndex(levels=[['DETAILED ASSESSMENT STATS:'], 
                                                                      ['Key Terms', 'Polarity Score',
                                                                       'Subjectivity Score', 'Type']], 
                                                              labels=[[0,0,0,0],[0,1,2,3]]))
        print assessment_frame
        print
    
    return final_sentiment                                       
コード例 #9
0
ファイル: doge_poetry.py プロジェクト: YehEmily/TextMining
def negative_sentiment(text, n, stanzas):
	""" Return a sad poem... because you didn't give doge a treat...
	"""
	generated_poem = poem(text, n, stanzas)
	poem_sentiment = sentiment(generated_poem)[0]
	if poem_sentiment < 0:
		polarity = poem_sentiment
	else:
		while poem_sentiment >= 0:
			generated_poem = poem(text, n, stanzas) # Continue generating poems until we get one with a positive polarity
			if sentiment(generated_poem)[0] < 0:
				polarity = sentiment(generated_poem)[0]
				break
	response = "doge's poem...\nwas...\nnot gud... because sad doge is still sad...\ntry feeding doge a treat!"
	generated_poem += "happiness rating (-1 to +1): {}\n{}\n".format(polarity, response)
	return generated_poem
コード例 #10
0
ファイル: models.py プロジェクト: foxt/ZaraConcurrent
 def get_sentiment_concur(self, tweet):
     '''
     Perform sentiment analysis on a single tweet. Return the scores
     in a comma seperated string (necessary for a process pool)
     '''
     score = sentiment(tweet["text"])
     return "{0},{1}".format(score[0],score[1])
コード例 #11
0
 def getData(self, params):
     if self.now_cache is not None:
         if (self.now_cache + datetime.timedelta(minutes=5)) < datetime.datetime.now():
             self.data_cache = None
             self.today_cache = None
             self.now_cache = None
     if self.data_cache is None:
         tweets = []
         for cand in candidates:
             tweets.append({'tweets': api.user_timeline(cand['user'], count=20), 
                             'name': cand['name'], 
                             'party': cand['party']})
         all_tweets = []
         for tweet_data in tweets:
             name = tweet_data['name']
             party = tweet_data['party']
             for tweet in tweet_data['tweets']:
                 all_tweets.append( {'Name': name,
                                     'Tweet': tweet.text, 
                                     'Favorites': tweet.favorite_count, 
                                     'Retweets': tweet.retweet_count} )
         dfs = pd.DataFrame(all_tweets)
         sentiments = [sentiment(tweet) for tweet in dfs['Tweet']]
         dfs['Polarity'] = [sent[0] for sent in sentiments]
         dfs['Subjectivity'] = [sent[1] for sent in sentiments]
         modal = [modality(Sentence(parse(tweet, lemmata=True))) for tweet in dfs['Tweet']]
         dfs['Certainty'] = modal
         today = date.strftime(datetime.datetime.now(), format='%m/%d/%Y, %H:%M')
         now = datetime.datetime.now()
         self.data_cache = dfs
         self.today_cache = today
         self.now_cache = now
     return self.data_cache
コード例 #12
0
ファイル: doge_poetry.py プロジェクト: YehEmily/TextMining
def positive_sentiment(text, n, stanzas):
	""" Return an AWESOME poem! Because you gave doge a treat!
	"""
	generated_poem = poem(text, n, stanzas)
	poem_sentiment = sentiment(generated_poem)[0]
	if poem_sentiment > 0:
		polarity = poem_sentiment
	else:
		while poem_sentiment <= 0:
			generated_poem = poem(text, n, stanzas) # Continue generating poems until we get one with a positive polarity
			if sentiment(generated_poem)[0] > 0:
				polarity = sentiment(generated_poem)[0]
				break
	response = "doge's poem... was... AWESOME!!!!!! yayayayay"
	generated_poem += "happiness rating (-1 to +1): {}\n{}\n".format(polarity, response)
	return generated_poem
コード例 #13
0
ファイル: frames.py プロジェクト: rsteckel/EDA
def features(sentence):    
    stop = nltk.corpus.stopwords.words('english')
    
    #ptree = parsetree(sentence, relations=True, lemmata=True)
    ptree = parsetree(sentence)
    matches = search('NP', ptree)
    phrases = []
    for match in matches:
        filtered_np = [ word for word in match if word.string.lower() not in stop ]
        if len(filtered_np) > 0:
            phrases.append( filtered_np )
    
    #for sentence in ptree:
    #    for chunk in sentence.chunks:
    #        if chunk.type == 'NP':
    #            print [(w.string, w.type) for w in chunk.words]
    
    sentence_sentiment = 'NEU'
    sent_result = sentiment(sentence)
    sent = sent_result[0]
    if sent > .1:
        sentence_sentiment  ='POS'
    elif sent < -.1:
        sentence_sentiment  ='NEG'
    
    sentence_subjectivity = 'OBJ'
    if sent_result[1] > .5:
        sentence_subjectivity = 'SUB'
    
    features = {}
    features['NP'] = phrases
    features['SN'] = sentence_sentiment
    features['SUB'] = sentence_subjectivity
    
    return features
コード例 #14
0
def add_sentiment(tdb):
        tweet_db = tdb
        for tweet in tweet_db:
                (polarity, subjectivity) = sentiment(tweet[2])
                tweet.append(polarity)
                tweet.append(subjectivity)
        return tweet_db
コード例 #15
0
 def _get_polarity(self):
     """
     Calculates polarity based on sentiment of a sentence
     """
     self.data['polarity'] = self.data['sentence'].apply(lambda x: [sentiment(i) for i in x])
     polarities = [polarity for sent_polarities in self.data['polarity'].values for polarity in sent_polarities]
     self._get_normalized_score(polarities)
コード例 #16
0
ファイル: sentiment.py プロジェクト: wallarelvo/streamy
    def analyze_text(self, text):
        analysis = sentiment(text)
        result = {
            "polarity": analysis[0],
            "subjectivity": analysis[1]
        }

        return result
コード例 #17
0
ファイル: hello.py プロジェクト: jgbrainstorm/webservicePy
def index():
    #data = json.loads(request.data)
    #data = request.data # if send in a json string, use request.data.
    comment = request.form['comment'] # if send in from a html form, use request.form
    usrname = request.form['usrname']
    senti = sentiment(comment)[0]
    #msg = data.get('message')
    #msg = 'test'
    return render_template('web_interface.html', comment=comment.strip(), usrname=usrname.strip(),sentiment=senti)
コード例 #18
0
ファイル: models.py プロジェクト: foxt/ZaraConcurrent
 def get_sentiment(self, tweets):
     '''
     Perform sentiment analysis on the given dict of tweets
     '''
     scores = []
     for tweet in tweets:
         score = sentiment(tweet["text"])
         scores.append(score)
     return scores
コード例 #19
0
    def Language_Analysis(text):
        """
        Takes a sentence or or paragraph for analysis with the sentiment module looking for positivity and negativity
        :param text: Sentence or Paragraph
        :type text: string
        :return: (1 to -1, 1 to -1) positivity, subjectivity
        :rtype: tuple
        """

        return tuple(sentiment(text))
コード例 #20
0
ファイル: views.py プロジェクト: youpony/aMuse
def _sentiment(city):
    """
    Return the sentiment for a specific city.
    """
    if not city:
        return .0

    _avg = lambda v: sum(v) / len(v) if len(v) else 0
    return _avg([sentiment(tweet.text)[0] for tweet in
                 twengine.search('#' + city.lower(), count=100, cached=True)])
コード例 #21
0
def SD():
    file_open = open('500-tweets.txt' , 'r').readlines()
    s2=[]
    for line in file_open:
    
        s = sentiment(line)	
        s1 = s[0]
        s2.append((s1-0.046)*(s1-0.046))
    b = sum(s2) 
    print 'Standard Deviation  = ',math.sqrt(b/500)
コード例 #22
0
def getScore(tweets, emotionDict):

	# get the basic Score for text of tweets
	basicScore = sentiment(tweets)[0]

	# store the
	emotionScore = 0
	emotionNumber = 0

	# retrivel the Dictionary find the corresponding adjective
	for key in emotionDict.keys():
		if key in tweets:
			emotionScore = emotionScore + sentiment(emotionDict[key])[0]
			emotionNumber += 1

	if emotionNumber == 0:
		return basicScore
	else:
		finalScore = (basicScore + emotionScore / emotionNumber) / 2
		return finalScore
コード例 #23
0
def get_regional_average(query,position):
    global twitter #Eek, but we are at a hackathon
    sum = 0
    n_tweets = 0
    for each_item in twitter.search(q=query,geocode=position)['statuses']:
        tweet = each_item['text']
        polarity = sentiment(tweet)
        sum += polarity[0]
        n_tweets += 1
    if n_tweets == 0:
        return 'No tweets for this topic'
    return str(sum/n_tweets)
コード例 #24
0
ファイル: Sentiment.py プロジェクト: latchireddyambati/api
 def patternSentiScore(self, doc):
     """
     returns polarity,subjectivity of a doc as tuple
     polarity is a value between -1.0 to  +1.0
     subjectivity between 0.0 to 1.0
     
     """
     
     result = sentiment(doc)
     sentiLabel = self.get_sentiment_class1(doc,lowerBound = -0.65, upperBound=0.65)
     result = {'polarity':result[0],'subjectivity':result[1],'category':sentiLabel}
     return result
コード例 #25
0
def MAD():
    file_open = open('500-tweets.txt' , 'r').readlines()
    s2=[]
    for line in file_open:
    
        s = sentiment(line)	
        s1 = s[0]
        s2.append(abs((s1-0.046)))

    
    print 'MAD =',sum(s2)/501
    print 'MAX = ',max(s2)
コード例 #26
0
 def calc_tweet_sentiment(self, tokenized_tweet):
     tweet_sentiment = 0
     pos_high = 0
     neg_high = 0
     for word in tokenized_tweet:
         sent = sentiment(word)
         tweet_sentiment += sent[0]
         if sent[0] > 0:
             pos_high = max(pos_high, sent[0])
         else:
             neg_high = min(neg_high, sent[0])
     return {"tweet_sentiment":abs(tweet_sentiment),"pos_high": pos_high,"neg_high": abs(neg_high)}
コード例 #27
0
ファイル: main.py プロジェクト: jagreene/SoftwareDesign
def sentiment_training(learning_data, fb):
	popularity_data = {}
	profile = fb.profile(id = None)
	#store friends as a metric, used to calculate percentage of likes/comments
	friends = len(fb.search(profile[0], type=FRIENDS, count=10000))
	for status, reaction in (learning_data.iteritems()):
		feeling = 0.0
		#comments are stored in the first area for reaction, unpack it
		comments = reaction[0]
		for comment in comments:
			feeling += sentiment(comment)[0]
		#Add real popularity metric
		if len(comments) > 0:
			feeling /= len(comments)
		#number of likes compared to number of friends and then same with comments
		likePercentage = float(reaction[1])/float(friends)*100
		commentPercentage = float(len(comments))/float(friends)*100

		#like formula to have a more smooth approach, rather than abrupt
		likeMetric = -.025*likePercentage**2 + .374*likePercentage -.227
		#comment formula similar to likeMetric
		commentMetric =-.54*commentPercentage**2+1.639*commentPercentage -.33

		#using a scale from -1 to 1, so make sure it is in the bounds
		if likeMetric > 1:
			likeMetric = 1

		if commentMetric > 1:
			commentMetric = 1

		#take average of the likes, comments, and sentiment
		feeling = (feeling + likeMetric + commentMetric)/3
		#bounds used to decide whether a post was successful or not
		if feeling > .6 :
			popularity = "very popular"
		elif feeling > .1:
			popularity = "popular"
		elif feeling > -.09:
			popularity = "meh"
		elif feeling >  -.59:
			popularity = "not popular"
		else:
			popularity = "bad"

		popularity_data.update({status:popularity})
	#format the data nicely for the machine learning portion
	training_data = [Document(status, type= popularity, stopwords=True) for status, popularity in popularity_data.items()]
	#we used the slp module from pattern as it resulted in more accurate results for bad/good comparisons
	slp = SLP(train=training_data)

	return slp
コード例 #28
0
ファイル: getMood.py プロジェクト: PonteIneptique/mobify
def getMood(dico):
	for chapter in dico:
		#Create new key
		dico[chapter]["sentiments"] = {}

		#Get every sentence
		for sentence in dico[chapter]["sentences"]:
			stce = dico[chapter]["sentences"][stce]
			dico[chapter]["sentiments"][sentence] = sentiment(stce)
		#End sentence loop



	#End chapter sentence
コード例 #29
0
ファイル: happy_sad.py プロジェクト: gabeos/slackotron
 def _callback(self, channel, user, message):
   message_text_sentiment = sentiment(message.text.lower())
   if message_text_sentiment[1] > self.sentiment_subjectivity_threshold:
     if message_text_sentiment[0] > 0.0:
       return self._response(
           message_text_sentiment[0],
           self.positive_responses
       )
     elif message_text_sentiment[0] < 0.0:
       return self._response(
           message_text_sentiment[0],
           self.negative_responses
       )
   return None
コード例 #30
0
ファイル: stanley.py プロジェクト: okoeroo/stanley
    def process(self, parent):
        buf = self.storage.retrieve(index='sentence', type='input', id='1')

        # Quit
        if buf.lower() in {'quit', 'stop', 'exit'}:
            sys.exit(0)

        # Is it a question?
        elif buf.lower().split()[0] in {"who", "where", "when", "why", "what", "which", "how"}:
            self.storage.store('question', index='sentence', type='sentence_type', id='1')

        # command, begin (first word here) with a nounce
        elif buf.lower().split()[0] in {"give", "show", "do", "fix", "stop", "start", ""}:
            self.storage.store('command', index='sentence', type='sentence_type', id='1')

        print sentiment(buf)

        s = buf
        s = parse(s,
                tokenize = True,  # Tokenize the input, i.e. split punctuation from words.
                tags = True,  # Find part-of-speech tags.
                chunks = True,  # Find chunk tags, e.g. "the black cat" = NP = noun phrase.
                relations = True,  # Find relations between chunks.
                lemmata = True,  # Find word lemmata.
                light = False)

#        print s

        for sentence in s.split():
            for word_tags in sentence:
                print word_tags
                if 'NNP' in word_tags:
                    print "My name is: " + word_tags[0]
                    self.storage.store(word_tags[0], index='personality', type='name', id='1')

        return
コード例 #31
0
ファイル: data_processing.py プロジェクト: IzzySmith/capstone
#sentiment analysis ran on the 10 most similar words
eighteen_words = [i[0] for i in eighteen_child]
print "eighteen words"
print eighteen_words
eighteenforty_words = [i[0] for i in eighteenforty_child]
print "eighteen forty"
print eighteenforty_words
nineteen_words = [i[0] for i in nineteen_child]
print "nineteen words"
print nineteen_words
#nineteenforty_words = [i[0] for i in nineteenforty_child]
twothousand_words = [i[0] for i in twothousand_child]
print "two thousand"
print twothousand_words

eighteen_sentiment_score = sentiment(eighteen_words)
eighteenforty_score = sentiment(eighteenforty_words)
nineteen_score = sentiment(nineteen_words)
#nineteenforty_score = sentiment(nineteen_forty_words)
twothousand_sentiment_score = sentiment(twothousand_words)

print "eighteen sentiment"
print eighteen_sentiment_score

print "eighteen forty score"
print eighteenforty_score

print "nineteen score"
print nineteen_score

#print "nineteenforty score"
コード例 #32
0
print lemma('running')
print conjugate('purred', '3sg')
print PAST in tenses('purred')  # 'p' in tenses() also works.
print(PAST, 1, PL) in tenses('purred')

print 'Quantification'

print quantify(['goose', 'goose', 'duck', 'chicken', 'chicken', 'chicken'])
print quantify('carrot', amount=90)
print quantify({'carrot': 100, 'parrot': 20})

print 'ngrams'
print ngrams("I am eating a pizza.", n=2)

#parse
s = parse('I eat pizza with a fork.')
pprint(s)

#tag
for word, t in tag('The cat felt happy.'):
    print word + ' is ' + t

s = "The movie attempts to be surreal by incorporating various time paradoxes, but it's presented in such a ridiculous way it's seriously boring."
print sentiment(s)
print polarity(s)
print subjectivity(s)

#The modality() function returns a value between -1.0 and +1.0, expressing the degree of certainty
s2 = "Some amino acids tend to be acidic while others may be basic."  # weaseling
se = Sentence(parse(s, chunks=False, lemmata=True))
print modality(se)
    def getChunks(self,review_text, star, positive_regex, negative_regex, review_id, dictionary_accuracy, resultsWriter):
        #try:
        count_pos = 0;count_neg = 0;total_count = 0
        #####
        # Following variables to add data to result csv
        results_csv_row = {}
        results_positive_phrases = []
        results_negative_phrases = []
        results_csv_row["Reviews"] = review_text
        results_csv_row["Stars"] = star

        #####
        review_phrases = []
        detectedPositive = False;
        detectedNegative = False;
        positiveParser = nltk.RegexpParser(positive_regex)
        negativeParser = nltk.RegexpParser(negative_regex)
        tokenized_reviews = nltk.word_tokenize(review_text)
        POStagged_reviews = nltk.pos_tag(tokenized_reviews)
        chunk_reviews = positiveParser.parse(POStagged_reviews)
        subtrees = chunk_reviews.subtrees()
        positive_score = 0.0
        negative_score = 0.0

        for each_subtree in subtrees:
            if each_subtree.label() == "positive":

                noun_phrase = ""
                (terms, tags) = zip(*each_subtree)
                for i in range(0,len(terms)):
                    noun_phrase = noun_phrase +" " + terms[i]
                polarity_score = Pattern.sentiment(noun_phrase.strip())
                if polarity_score[0] >=(0.2) and polarity_score[1]>=0.5:
                    results_positive_phrases.append(noun_phrase)
                    positive_score += Pattern.sentiment(noun_phrase)[0]
                    total_count += 1
                    #print "Positives:", noun_phrase, ": " ,positive_score

                    detectedPositive = True
        chunk_reviews = negativeParser.parse(POStagged_reviews)
        subtrees = chunk_reviews.subtrees()
        for subtree in subtrees:

            if subtree.label() == 'negative':

                noun_phrase = ""
                (terms, tags) = zip(*subtree)
                for i in range(0,len(terms)):
                    noun_phrase = noun_phrase + " " + terms[i]
                polarity_score = Pattern.sentiment(noun_phrase.strip())
                if polarity_score[0] <=(-0.2) and polarity_score[1]>=0.5:
                    results_negative_phrases.append(noun_phrase)
                    negative_score += Pattern.sentiment(noun_phrase)[0]

                    total_count += 1
                   # print "Negatives:", noun_phrase, ": " ,negative_score
                    detectedNegative = True;

        if detectedPositive or detectedNegative:


            sentence_score = (positive_score+negative_score)/2
            self.correlation_vector1.append(sentence_score)
            self.correlation_vector2.append(star)
            dictionary_accuracy[review_id] = ((positive_score+negative_score)/2,star)
            if sentence_score > 0:
                results_csv_row["Positive_Phrases"] = results_positive_phrases
                results_csv_row["Negative_Phrases"] = []
                results_csv_row["Positive_Polarity"] = sentence_score
                results_csv_row["Negative_Polarity"] = "NA"
                resultsWriter.writerow(results_csv_row)
                return (results_positive_phrases, "Positive")
            else:
                results_csv_row["Positive_Phrases"] = []
                results_csv_row["Negative_Phrases"] = results_negative_phrases
                results_csv_row["Negative_Polarity"] = sentence_score
                results_csv_row["Positive_Polarity"] = "NA"
                resultsWriter.writerow(results_csv_row)
                return (results_negative_phrases, "Negative")
コード例 #34
0
    table = Datasheet.load(pd("tweets.csv"))
    index = set(table.columns[0])
except:
    table = Datasheet()
    index = set()

engine = Twitter(language="en")

prev = '1071765537749917696'

counter = 0

while counter < 1000:

    counter += 1
    time.sleep(60)
    for tweet in engine.search("#Apple", start=prev, count=10, cached=False):
        print(tweet.id)
        #        print(tweet.text)
        #        print(tweet.date)
        tweet_sentiment = sentiment(tweet.text)
        print(tweet_sentiment)

        if len(table) == 0 or tweet.id not in index:

            table.append([tweet.id, tweet.date, tweet.text, tweet_sentiment])
            index.add(tweet.id)

        prev = tweet.id

table.save(pd("tweets2.csv"))
コード例 #35
0
"""
Python 3.7
"""

# -*- coding: utf-8 -*-
"""

"""

from pattern.en import sentiment


entry = "This is so bad"

'''Introduction to the function'''
print(sentiment(entry))


'''Repurposing the function for our needs'''
#print(sentiment(entry)[0])


'''Let's try a few'''

#caption1 = "OMG this is awesome"
#caption2 = "NOOOO, that's so ugly"
#caption3 = "It's alright"
#
#print("'",caption1,"'" , " Has a sentiment of " , sentiment(caption1)[0], " which is Positive")
#print("'",caption2,"'" , " Has a sentiment of " , sentiment(caption2)[0], " which is Negative")
#print("'",caption3,"'" , " Has a sentiment of " , sentiment(caption3)[0], " which is Neutral")
コード例 #36
0
def sent_pattern(df):
    from pattern.en import sentiment, polarity, subjectivity, positive
    for index, row in df.iterrows():
        print(row['text'], sentiment(row['text']))
        if index >=20:
            break
コード例 #37
0
print chunk.pnp  # PNPChunk parent, or None.
# print chunk.previous(type=None)
# print chunk.next(type=None)
# print chunk.nearest(type='VP')
# propositional noun phrases
pnp = PNPChunk('The cat sat on the mat.',
               words=[],
               type=None,
               role=None,
               relation=None)
print pnp.string  # String of words (Unicode).
print pnp.chunks  # List of Chunk objects.
# print pnp.preposition            # First PP chunk in the PNP.
# sentiment
print sentiment(
    "The movie attempts to be surreal by incorporating various time paradoxes,"
    "but it's presented in such a ridiculous way it's seriously boring.")
print sentiment('Wonderfully awful! :-)').assessments
# mode and modality
s = "Some amino acids tend to be acidic while others may be basic."  # weaseling
s = parse(s, lemmata=True)
s = Sentence(s)
print modality(s)
# wordnet
s = wordnet.synsets('bird')[0]
print 'Definition:', s.gloss  # Definition string.
print '  Synonyms:', s.synonyms  # List of word forms (i.e., synonyms)
print ' Hypernyms:', s.hypernyms(
)  # returns a list of  parent synsets (i.e., more general). Synset (semantic parent).
print ' Hypernyms:', s.hypernyms(recursive=False, depth=None)
print '  Hyponyms:', s.hyponyms(
コード例 #38
0
def test():
    text = 'This is not very good. But I think it is nice.'

    s = sentiment(text)
    print(s.assessments)
コード例 #39
0
ファイル: run.py プロジェクト: dunovank/sentiment-analysis
import csv
import sys
import codecs

from pattern.en import sentiment

input_, output_ = str(sys.argv[1]), str(sys.argv[2])

with codecs.open('/output.txt', 'w') as fout:

    writer = csv.writer(fout, delimiter='\t')
    with codecs.open('/input.txt', 'r') as fin:
        for l_i, line in enumerate(fin):
            line = line.strip()
            result = sentiment(line)[0]
            prediction = None
            if result > 0:
                prediction = 1
            elif result < 0:
                prediction = -1
            elif result == 0:
                prediction = 0
            writer.writerow([l_i, prediction])
コード例 #40
0
ファイル: termassociation.py プロジェクト: debayanbose/NLP
    try:
        score= max(s1.wup_similarity(s2) for (s1, s2) in product(ss1, ss2))
        return score
    except:
        score= 0
        return score
        
#str1="This has a software unit and hardware unit. It has license, support and services"
theme=["printer"]
lookup=["printing"] # include driver
themescore=[]
for thmlen in range(0,len(theme)):
    score=0
    for lkuplen in range(0,len(lookup)):
        try:
            synword1=wn.synsets(theme[thmlen])
            synword2=wn.synsets(lookup[lkuplen])
            intm_score=max(word1.wup_similarity(word2) for (word1,word2) in product(synword1,synword2))
            score=score + intm_score
        except:
            score=0
            
    themescore.append(score)
print themescore

from pattern.en import sentiment
print sentiment("Valve trying to stop consumers rights on Steam Gamo, on 31 July 2012 - 11:18 PM, said: jehurey, on 31 July 2012 - 11:12 PM, said: Is Valve putting a forced arbitration clause in their EULA? It doesn't really surprise me. Gabe Newell was a former Microsoft bigwig from the Windows heydays. I knew something was up when Randy Pritchford, a few years ago, was saying some rather odd remarks about the Steam service, but you could tell that he didn't want to say something that would get him in trouble. Gamers love Steam because of all these deals. Valve essentially forces developers into an environment in which the prices of their game tanks, and then Valve advertises it and they get all the credit. What do you mean forces the prices of their games to tank? You mean games that are 3+ years old or sell like complete shit - if it was not for steam most of those developers would have never made any money look at how much ArmA sold during the summer sale - game would have not sold otherwise I wonder how much the developer gets after Valve gets their cut It wouldn't surprise me if there isn't some sort of contractual agreement that Valve essentially controls the price of YOUR game after its been on the Steam service for X amount of time. Because we're seeing that after 12-18 months or so, all games tank in price down to $2.50-$9.99 And now Valve wants to control the USER-generated content through Steam. This is why they created DOTA. Because it serves as the vessel. People can create in-game items for a game, they sell it through the Steam marketplace, and Valve gets their cut for being the middle man. There is something very fishy with Steam. Gabe Newell is pissed because he knows that Windows 8 and Mac OS will have closed environments for applications. The App store is a threat to his store ")

        

コード例 #41
0
def patternPolarity(tweet):
    polarity = sentiment(tweet)[0]
    return polarity
        
コード例 #42
0
ファイル: views.py プロジェクト: sammienjihia/mwananchi_
                return

            except:
                ret_msg = """Invalid email address.Please send the word volunteer followed by a valid email address to {}.""" \
                    .format(aspirant.alias_name, settings.SMS_SHORT_CODE)
                sms_util.send_single_sms(sms_util.format_phone_number(sender),
                                         ret_msg)
                Outbox(phone_number=sms_util.format_phone_number(sender),
                       user=aspirant.user,
                       message=ret_msg,
                       message_type="ACK",
                       is_sent=True,
                       date_sent=datetime.datetime.now()).save()

        else:
            polarity_subjectivity = sentiment(message)
            sms_inbox.polarity = polarity_subjectivity[0]
            sms_inbox.subjectivity = polarity_subjectivity[1]
            if polarity_subjectivity[1] > 0:
                if polarity_subjectivity[0] >= 0.1:
                    sms_inbox.sentiment = 'POSITIVE'
                else:
                    sms_inbox.sentiment = 'NEGATIVE'
            else:
                sms_inbox.sentiment = 'NEUTRAL'
            sms_inbox.message_type = 'CHAT'
            sms_inbox.save()

    except Exception, exp:
        print("{} sending sms.".format(exp))
コード例 #43
0
def _get_sentiment(text, nlp):
    if nlp == 'pattern':
        return sentiment(text)[0]

    sid = SentimentIntensityAnalyzer()
    return sid.polarity_scores(text)['compound']
コード例 #44
0
ファイル: test_en.py プロジェクト: xu-hong/pattern
 def test_sentiment_assessment(self):
     # Assert that en.sentiment() has a fine-grained "assessments" property.
     v = en.sentiment("A warm and pleasant day.").assessments
     self.assertTrue(v[1][0] == "pleasant")
     self.assertTrue(v[1][1] > 0)
     print "pattern.en.sentiment().assessments"
コード例 #45
0
 def _sentiment(self, link):
    s = sentiment(self._getPage(link))[0]
    self.sentiments.append(s)
コード例 #46
0
 def test_subjectivity(self):
     # Assert that en.subjectivity() yields en.sentiment()[1].
     s = "A great day!"
     self.assertTrue(en.subjectivity(s) == en.sentiment(s)[1])
     print "pattern.en.subjectivity()"
コード例 #47
0
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))

from pattern.en import sentiment, polarity, subjectivity, positive
from pattern.db import Datasheet, pprint, pd

# Sentiment analysis (or opinion mining) attempts to determine if
# a text is objective or subjective, positive or negative.
# The sentiment analysis lexicon bundled in Pattern focuses on adjectives.
# It contains adjectives that occur frequently in customer reviews,
# hand-tagged with values for polarity and subjectivity.

# The polarity() function measures positive vs. negative, as a number between -1.0 and +1.0.
# The subjectivity() function measures objective vs. subjective, as a number between 0.0 and 1.0.
# The sentiment() function returns an averaged (polarity, subjectivity)-tuple for a given string.
for word in ("amazing", "horrible", "public"):
    print word, sentiment(word)

text = "The movie attempts to be surreal by incorporating time travel and various time paradoxes, but it's presented in such a ridiculous way it's seriously boring."
print
print sentiment(text)
print
print polarity(text)
print
print subjectivity(text)

# The input string can be:
# - a string,
# - a Synset (see pattern.en.wordnet),
# - a parsed Sentence, Text, Chunk or Word (see pattern.en),
# - a Document (see pattern.vector).
コード例 #48
0
 def test_polarity(self):
     # Assert that en.polarity() yields en.sentiment()[0].
     s = "A great day!"
     self.assertTrue(en.polarity(s) == en.sentiment(s)[0])
     print "pattern.en.polarity()"
コード例 #49
0
import os, sys
sys.path.insert(0, os.path.join("..", ".."))

from pattern.en import sentiment, polarity, subjectivity, positive

# Sentiment analysis (or opinion mining) attempts to determine if
# a text is objective or subjective, positive or negative.
# The sentiment analysis lexicon bundled in Pattern focuses on adjectives.
# It contains adjectives that occur frequently in customer reviews,
# hand-tagged with values for polarity and subjectivity.

# polarity() measures positive vs. negative, as a number between -1.0 and +1.0.
# subjectivity() measures objective vs. subjective, as a number between 0.0 and 1.0.
# sentiment() returns a tuple of (polarity, subjectivity) for a given string.
for word in ("amazing", "horrible", "public"):
    print word, sentiment(word)

print
print sentiment(
    "The movie attempts to be surreal by incorporating time travel and various time paradoxes,"
    "but it's presented in such a ridiculous way it's seriously boring.")

# The input string can also be a Synset, or a parsed Sentence, Text, Chunk or Word.

# positive() returns True if the string's polarity >= threshold.
# The threshold can be lowered or raised,
# but overall for strings with multiple words +0.1 yields the best results.
print
print "good:", positive("good", threshold=0.1)
print " bad:", positive("bad")
print
コード例 #50
0
def mean(lst):
    """calculates mean"""
    return sum(lst) / len(lst)


def stddev(lst):
    """returns the standard deviation of lst"""
    mn = mean(lst)
    variance = sum([(e - mn)**2 for e in lst])
    return sqrt(variance)


for tweet in db.tweets.find():
    myText = re.sub(r'\w+:\/{2}[\d\w-]+(\.[\d\w-]+)*(?:(?:\/[^\s/]*))*', '',
                    tweet["text"].replace("#", "").replace("@", ""))
    #print tweet["favorite_count"]
    response = math.ceil(sentiment(myText.replace("#", ""))[0] * 1000) / 1000
    if response != 0.0:
        for hashtag in tweet["entities"]["hashtags"]:
            #print hashtag['text'].encode("utf-8")
            if QUERY.lower() in hashtag['text'].lower():
                data.append(response)
                break
            elif QUERY.lower() in tweet['text'].lower():
                data.append(response)
                break

    #except KeyError, UnicodeEncodeError:
    #	print "Error"
print "SD: " + stddev(data) + " " + "MEAN: " + mean(data)