Example #1
0
 def test_sentiment(self):
     # Assert < 0 for negative adjectives and > 0 for positive adjectives.
     self.assertTrue(en.sentiment("wonderful")[0] > 0)
     self.assertTrue(en.sentiment("horrible")[0] < 0)
     self.assertTrue(en.sentiment(en.wordnet.synsets("horrible", pos="JJ")[0])[0] < 0)
     self.assertTrue(en.sentiment(en.Text(en.parse("A bad book. Really horrible.")))[0] < 0)
     # Assert that :) and :( are recognized.
     self.assertTrue(en.sentiment(":)")[0] > 0)
     self.assertTrue(en.sentiment(":(")[0] < 0)
     # Assert the accuracy of the sentiment analysis (for the positive class).
     # Given are the scores for Pang & Lee's polarity dataset v2.0:
     # http://www.cs.cornell.edu/people/pabo/movie-review-data/
     # The baseline should increase (not decrease) when the algorithm is modified.
     from pattern.db import Datasheet
     from pattern.metrics import test
     reviews = []
     for score, review in Datasheet.load(os.path.join(PATH, "corpora", "polarity-en-pang&lee1.csv")):
         reviews.append((review, int(score) > 0))
     A, P, R, F = test(lambda review: en.positive(review), reviews)
     self.assertTrue(A > 0.755)
     self.assertTrue(P > 0.760)
     self.assertTrue(R > 0.747)
     self.assertTrue(F > 0.754)
     # Assert the accuracy of the sentiment analysis on short text (for the positive class).
     # Given are the scores for Pang & Lee's sentence polarity dataset v1.0:
     # http://www.cs.cornell.edu/people/pabo/movie-review-data/
     reviews = []
     for score, review in Datasheet.load(os.path.join(PATH, "corpora", "polarity-en-pang&lee2.csv")):
         reviews.append((review, int(score) > 0))
     A, P, R, F = test(lambda review: en.positive(review), reviews)
     self.assertTrue(A > 0.642)
     self.assertTrue(P > 0.653)
     self.assertTrue(R > 0.607)
     self.assertTrue(F > 0.629)
     print "pattern.en.sentiment()"
def task10():
    """
    This function sort all the tweets from CrimeReport according to posititve function of pattern.en who has polarity >= threshold will put in positive-sentiment-tweets.txt otherwise in negative-sentiment-tweets.txt. 
    :return:
    """
    # Initialize  tweets list
    tweets = []

    # Open the data file and read line by line
    with open('CrimeReport.txt', 'r') as data_file:
        for line in data_file:
            # Load it as a json object
            tweets.append(json.loads(line))
            
        # Open output files
        file1 = open('positive-sentiment-tweets.txt','w+')
        file2 = open('negative-sentiment-tweets.txt','w+')

        for tweet in tweets:
            # If positive function return TRUE then store to positive-tweets else negative-tweets.
            if positive(tweet['text'],threshold=0.1):
                file1.write(json.dumps(tweet) + '\n')
            else:
                file2.write(json.dumps(tweet) + '\n')
            
        # Close the output files.
        file1.close()
        file2.close()
    print "Please check the output files:\npositive-sentiment-tweets.txt  negative-sentiment-tweets"
Example #3
0
 def getAFFECT(self, lineLIST):
     wordLIST=list()
     positiveCOUNTER=0
     negativeCOUNTER=0
     everyLINE=0
     sentencecount=0
     Sentences1=[]
     for everyPARAGRAPH in lineLIST:
         Sentences1=sent_tokenize(lineLIST[everyLINE])
         newsentencecount=len(Sentences1)
         sentencecount=sentencecount+newsentencecount
         count1=0
         for everysentence in Sentences1:
             if count1<len(Sentences1)and len(Sentences1)>0:
                 newSentences1=word_tokenize(Sentences1[count1])
                 count1=count1+1
                 c=0
                 for everyword in newSentences1:
                     lowercaseCHAR=newSentences1[c].lower()
                     wordLIST.append(lowercaseCHAR)
                     c=c+1
                     if positive(lowercaseCHAR, threshold=0.5):
                        positiveCOUNTER=positiveCOUNTER+1
                     else:
                        negativeCOUNTER=negativeCOUNTER+1
     totalAFFECT=float(negativeCOUNTER+positiveCOUNTER)
     Affect=float(totalAFFECT/sentencecount)
     return Affect
Example #4
0
 def test_sentiment_twitter(self):
     sanders = os.path.join(PATH, "corpora", "polarity-en-sanders.csv")
     if os.path.exists(sanders):
         # Assert the accuracy of the sentiment analysis on tweets.
         # Given are the scores for Sanders Twitter Sentiment Corpus:
         # http://www.sananalytics.com/lab/twitter-sentiment/
         # Positive + neutral is taken as polarity >= 0.0,
         # Negative is taken as polarity < 0.0.
         # Since there are a lot of neutral cases,
         # and the algorithm predicts 0.0 by default (i.e., majority class) the results are good.
         # Distinguishing negative from neutral from positive is a much
         # harder task
         from pattern.db import Datasheet
         from pattern.metrics import test
         reviews = []
         for i, id, date, tweet, polarity, topic in Datasheet.load(sanders):
             if polarity != "irrelevant":
                 reviews.append(
                     (tweet, polarity in ("positive", "neutral")))
         A, P, R, F = test(
             lambda review: en.positive(review, threshold=0.0), reviews)
         #print(A, P, R, F)
         self.assertTrue(A > 0.824)
         self.assertTrue(P > 0.879)
         self.assertTrue(R > 0.911)
         self.assertTrue(F > 0.895)
Example #5
0
 def test_sentiment(self):
     # Assert < 0 for negative adjectives and > 0 for positive adjectives.
     self.assertTrue(en.sentiment("wonderful")[0] > 0)
     self.assertTrue(en.sentiment("horrible")[0] < 0)
     self.assertTrue(
         en.sentiment(en.wordnet.synsets("horrible", pos="JJ")[0])[0] < 0)
     self.assertTrue(
         en.sentiment(en.Text(en.parse("A bad book. Really horrible.")))[0]
         < 0)
     # Assert the accuracy of the sentiment analysis.
     # Given are the scores for Pang & Lee's polarity dataset v2.0:
     # http://www.cs.cornell.edu/people/pabo/movie-review-data/
     # The baseline should increase (not decrease) when the algorithm is modified.
     from pattern.db import Datasheet
     from pattern.metrics import test
     reviews = []
     for score, review in Datasheet.load(
             os.path.join("corpora", "pang&lee-polarity.txt")):
         reviews.append((review, int(score) > 0))
     A, P, R, F = test(lambda review: en.positive(review), reviews)
     self.assertTrue(A > 0.71)
     self.assertTrue(P > 0.72)
     self.assertTrue(R > 0.70)
     self.assertTrue(F > 0.71)
     print "pattern.en.sentiment()"
Example #6
0
 def test_sentiment_twitter(self):
     sanders = os.path.join(PATH, "corpora", "polarity-en-sanders.csv")
     if os.path.exists(sanders):
         # Assert the accuracy of the sentiment analysis on tweets.
         # Given are the scores for Sanders Twitter Sentiment Corpus:
         # http://www.sananalytics.com/lab/twitter-sentiment/
         # Positive + neutral is taken as polarity >= 0.0,
         # Negative is taken as polarity < 0.0.
         # Since there are a lot of neutral cases,
         # and the algorithm predicts 0.0 by default (i.e., majority class) the results are good.
         # Distinguishing negative from neutral from positive is a much harder task
         from pattern.db import Datasheet
         from pattern.metrics import test
         reviews = []
         for i, id, date, tweet, polarity, topic in Datasheet.load(sanders):
             if polarity != "irrelevant":
                 reviews.append((tweet, polarity
                                 in ("positive", "neutral")))
         A, P, R, F = test(
             lambda review: en.positive(review, threshold=0.0), reviews)
         #print A, P, R, F
         self.assertTrue(A > 0.824)
         self.assertTrue(P > 0.879)
         self.assertTrue(R > 0.911)
         self.assertTrue(F > 0.895)
Example #7
0
 def on_status(self, status):
     if status.lang != "en":
         return
     is_positive = positive(status.text)
     if self.led_matrix:
         pixels = Pixels.HAPPY_FACE if is_positive else Pixels.SAD_FACE
         self.led_matrix.run_animation(pixels)
     else:
         print "[%s] %s" % \
                 ("+" if is_positive else "-", status.text)
Example #8
0
def main():
    i = 0
    f = open('BERNIE_Sioux_Falls_DATA.txt', "r")
    for line in f:
        #tweet = json.loads(line)
        if positive(str(line), threshold=0.1):
            file = open('bernie_sioux_falls_positive.txt', "a")
            file.write(json.dumps(line) + '\n')
            i += 1
            file.close()
        else:
            file1 = open('bernie_sioux_falls_negative.txt', "a")
            file1.write(json.dumps(line) + '\n')
            file1.close()
Example #9
0
def main():
    i = 0
    f = open('Hillary_Boston.txt', "r")
    for line in f:
        #tweet = json.loads(line)
        if positive(str(line), threshold=0.1):
            file = open('clinton_boston_positive.txt', "a")
            file.write(json.dumps(line) + '\n')
            i += 1
            file.close()
        else:
            file1 = open('clinton_boston_negative.txt', "a")
            file1.write(json.dumps(line) + '\n')
            file1.close()
def main():
    i = 0
    f = open('Trump_Jacksonville.txt', "r")
    for line in f:
        #tweet = json.loads(line)
        if positive(str(line), threshold=0.1):
            file = open('trump_jacksonville_positive.txt', "a")
            file.write(json.dumps(line) + '\n')
            i += 1
            file.close()
        else:
            file1 = open('trump_jacksonville_negative.txt', "a")
            file1.write(json.dumps(line) + '\n')
            file1.close()
def main():
    i = 0
    f = open('Bernie_NYC.txt', "r")
    for line in f:
        #tweet = json.loads(line)
        if positive(str(line), threshold=0.1):
            file = open('Bernie_Positive_tweets.txt', "a")
            file.write(json.dumps(line) + '\n')
            i += 1
            file.close()
        else:
            file1 = open('Bernie_Negative_tweets.txt', "a")
            file1.write(json.dumps(line) + '\n')
            file1.close()
def main():
    i = 0
    f = open('CLINTON_MILWAUKEE_DATA.txt', "r")
    for line in f:
        #tweet = json.loads(line)
        if positive(str(line), threshold=0.1):
            file = open('clinton_milwaukee_positive.txt', "a")
            file.write(json.dumps(line) + '\n')
            i += 1
            file.close()
        else:
            file1 = open('clinton_milwaukee_negative.txt', "a")
            file1.write(json.dumps(line) + '\n')
            file1.close()
Example #13
0
def main():
    i = 0
    f = open('TRUMP_DENVER_DATA.txt', "r")
    for line in f:
        #tweet = json.loads(line)
        if positive(str(line), threshold=0.1):
            file = open('trump_denver_positive.txt', "a")
            file.write(json.dumps(line) + '\n')
            i += 1
            file.close()
        else:
            file1 = open('trump_denver_negative.txt', "a")
            file1.write(json.dumps(line) + '\n')
            file1.close()
Example #14
0
def main():
    i = 0
    f = open('CRUZ_SEATTLE_DATA.txt', "r")
    for line in f:
        #tweet = json.loads(line)
        if positive(str(line), threshold=0.1):
            file = open('cruz_seattle_positive.txt', "a")
            file.write(json.dumps(line) + '\n')
            i += 1
            file.close()
        else:
            file1 = open('cruz_seattle_negative.txt', "a")
            file1.write(json.dumps(line) + '\n')
            file1.close()
def main():
    i = 0
    f = open('CLINTON_LITTLE_ROCK_DATA.txt', "r")
    for line in f:
        #tweet = json.loads(line)
        if positive(str(line), threshold=0.1):
            file = open('clinton_little_rock_positive.txt', "a")
            file.write(json.dumps(line) + '\n')
            i += 1
            file.close()
        else:
            file1 = open('clinton_little_rock_negative.txt', "a")
            file1.write(json.dumps(line) + '\n')
            file1.close()
Example #16
0
def main():

    tweets_filename = 'sports.txt'
    tweets_file = open(tweets_filename, "r")

    #file1 = 'test.txt'
    #file2 = open(file1, 'w')
    #f = open('test1.txt', 'w')
    count = 0
    tweets = []
    B = [1, 1, "xyz"]
    print "***********************************************************************"
    for line in tweets_file:
        try:
            # Read in one line of the file, convert it into a json object
            tweet = json.loads(line.strip())
            tweet_text = tweet['text']
            tweet_assment = sentiment(tweet_text)
            #B[0] = count
            #B[2] = tweet['text']

            if positive(tweet_assment, threshold=0.1):
                fil = open('sports_train.txt', 'a')

                count += 1
                B[0] = count
                B[1] = 1
                B[2] = tweet['text']
                fil.write(json.dumps(B) + '\n')
                fil.close()

            else:
                fil = open('sports_train.txt', 'a')
                count += 1
                B[0] = count
                B[1] = 0
                B[2] = tweet['text']
                fil.write(json.dumps(B) + '\n')
                fil.close()

            #f.write(json.dumps(B) + '\r\n')
            #tweets.insert(count,B)

        except:
            # read in a line is not in JSON format
            continue
    print "***********************************************************************"
    print count
    fil.close()
Example #17
0
def task10():
    print("TASK 10:")
    tweets = []
    for line in open('CrimeReport.txt', 'r').readlines():
        tweet = json.loads(line)
        tweets.append(tweet)
        print sentiment(tweet['text']).assessments
        if positive(tweet['text'],threshold=0.1):
            with open('positive-sentiment-tweets.txt','a') as fp:
                fp.write(json.dumps(tweet)+'\n')
            fp.close()
        else:
            with open('negative-sentiment-tweets.txt','a') as fn:
                fn.write(json.dumps(tweet)+'\n')
            fn.close()
Example #18
0
def task10():
    print 'Task 10'
    tweets = []
    positive_feeds = []
    negative_feeds = []
    with open('CrimeReport.txt') as f:
        for line in f:
            tweet = json.loads(line)
            tweets.append(tweet)
            if pen.positive(tweet['text'], threshold=0.0):
                positive_feeds.append(tweet)
            else:
                negative_feeds.append(tweet)
    with open('positive-entiment-tweets.txt', 'w') as file:
        json.dump(positive_feeds, file)
    with open('negative-sentiment-tweets.txt', 'w') as file:
        json.dump(negative_feeds, file)
Example #19
0
 def test_sentiment(self):
     # Assert < 0 for negative adjectives and > 0 for positive adjectives.
     self.assertTrue(en.sentiment("wonderful")[0] > 0)
     self.assertTrue(en.sentiment("horrible")[0] < 0)
     self.assertTrue(en.sentiment(en.wordnet.synsets("horrible", pos="JJ")[0])[0] < 0)
     self.assertTrue(en.sentiment(en.Text(en.parse("A bad book. Really horrible.")))[0] < 0)
     # Assert the accuracy of the sentiment analysis.
     # Given are the scores for Pang & Lee's polarity dataset v2.0:
     # http://www.cs.cornell.edu/people/pabo/movie-review-data/
     # The baseline should increase (not decrease) when the algorithm is modified.
     from pattern.db import Datasheet
     from pattern.metrics import test
     reviews = []
     for score, review in Datasheet.load(os.path.join("corpora", "pang&lee-polarity.txt")):
         reviews.append((review, int(score) > 0))
     A, P, R, F = test(lambda review: en.positive(review), reviews)
     self.assertTrue(A > 0.71)
     self.assertTrue(P > 0.72)
     self.assertTrue(R > 0.70)
     self.assertTrue(F > 0.71)
     print "pattern.en.sentiment()"
Example #20
0
def task10():
    print "Answer for Task 10"
    tweets = []
    # Reading each line and appending it to tweets array
    file10 = open('removedDuplicates.txt', 'r')
    for line in file10:
        tweets.append(json.loads(line))
        # Opening two files to add positive and negative tweets
    file10Positive = open('positive.txt', 'w')
    file10Negative = open('negative.txt', 'w')
    for tweet in tweets:
        # Finding Tweets Threshold value
        if positive(tweet['text'], threshold=0.1):
            # Writing Positive tweets to positive.txt
            print "Wonderful ... Its positive tweets."
            file10Positive.write(json.dumps(tweet) + '\n')
        else:
            # Writing negative tweets to negative.txt
            print "Awful ... Negative tweets."
            file10Negative.write(json.dumps(tweet) + '\n')
    file10Positive.close()
    file10Negative.close()
Example #21
0
print
print sentiment(
    "The movie attempts to be surreal by incorporating time travel and various time paradoxes,"
    "but it's presented in such a ridiculous way it's seriously boring.") 

# The input string can be:
# - a string, 
# - a Synset (see pattern.en.wordnet), 
# - a parsed Sentence, Text, Chunk or Word (see pattern.en),
# - a Document (see pattern.vector).

# The positive() function returns True if the string's polarity >= threshold.
# The threshold can be lowered or raised, 
# but overall for strings with multiple words +0.1 yields the best results.
print
print "good:", positive("good", threshold=0.1)
print " bad:", positive("bad")
print

# You can also do sentiment analysis in Dutch or French, 
# it works exactly the same:

#from pattern.nl import sentiment as sentiment_nl
#print "In Dutch:"
#print sentiment_nl("Een onwijs spannend goed boek!")

# You can also use Pattern with SentiWordNet.
# You can get SentiWordNet at: http://sentiwordnet.isti.cnr.it/
# Put the file "SentiWordNet*.txt" in pattern/en/wordnet/
# You can then use Synset.weight() and wordnet.sentiwordnet:
Example #22
0
def task10(filename):

    #declaring the tweets list
    tweets = []

    #opening the file handle to read
    with open(filename, 'r') as f:

        #reading full file in a list1
        list1 = f.readlines()

    #closing the file handle
    f.close()

    #reading the list1 by element by element
    for string in list1:

        #reading each element of list in string
        tweet = json.loads(string)

        #appending the tweet into tweets list
        tweets.append(tweet)

    #positive sentiments a filename
    postiveFilename = "positive-entiment-tweets.txt"

    #negative sentiments a filename
    negativeFilename = "negative-entiment-tweets.txt"

    #opening a file handle for postive sentiment file
    p = open(postiveFilename, 'a')

    #opening a file handle for negative sentiment file
    n = open(negativeFilename, 'a')

    #reading the tweet from a list one by one
    for tweet in tweets:

        # if positive of a tweet is beyond threshold for polarity, then writing in positve sentiment file
        if positive(tweet["text"], threshold=0.1):

            #dumping the json object of tweet into a file
            p.write(json.dumps(tweet))

            #appending a new line character into a file
            p.write("\n")

        # if negative of a tweet is beyond threshold for polarity, then writing in negative sentiment file
        else:

            #dumping the json object of tweet into a file
            n.write(json.dumps(tweet))

            #appending a new line character into a file
            n.write("\n")

    #closing the file handle for positve sentiment file
    p.close()

    #closing the file handle for negative sentiment file
    n.close()

    print "task 10 Done"
	# Build the output csv files by separating retrieved phrases as either positive or negative
	word_cnt_pos = defaultdict(int)
	word_list = []
	noun_dict_pos = defaultdict(int)
	word_cnt_neg = defaultdict(int)
	noun_dict_neg = defaultdict(int)
	reviewHash = {}
	keyCount = 0
	for item in madisonHospitalReviews.find():
		keyCount = keyCount + 1
		reviewHash[keyCount] = item["text"]
	for key, value in reviewHash.iteritems():
		string = value
		tokenized = custom_sent_tokenizer.tokenize(string)
		if(positive(string,0.1)):
			try:
				words = nltk.word_tokenize(string)
				tagged = nltk.pos_tag(words)
				chunkGram = "NP: {<JJ> <NN>|<JJ> <NNS>|<NN> <NNS>}"
				chunkParser = nltk.RegexpParser(chunkGram)
				chunked = chunkParser.parse(tagged)
				for subtree in chunked.subtrees():
					if subtree.label() == 'NP':
						string = subtree.leaves()
						(terms, tags) = zip(*subtree)
						for i in range(0,len(terms)):
							word_list.append(terms[i].lower())
							word_cnt_pos[terms[i].lower()] += 1
						noun_dict_pos[(terms[0].lower(),terms[1].lower())] = 1
						word_list = []	
Example #24
0
	
	for item in sortedIDs:
		for i in charlotteHospitalReviews.find():
			if item == i["text"]:
				sortedCollection.insert(i)
	keyCount = 0;reviewHash = {}
	for item in sortedCollection.find():
		keyCount = keyCount + 1
		reviewHash[keyCount] = item["text"]
	
	count_pos_sent =0
	for key,value in reviewHash.iteritems():
		final_score = 3.0
		count_pos = 0;count_neg = 0;total_count = 0
		review = value;phrase = ""
		if positive(value,0.1) == True:count_pos_sent += 1
		words = nltk.word_tokenize(review)
		tagged = nltk.pos_tag(words)
		try:					
			chunked = chunkParser_pos.parse(tagged)
			for subtree in chunked.subtrees():
				if subtree.label() == 'POS':
					phrase = ""
					(terms, tags) = zip(*subtree)
					for i in range(0,len(terms)):
						phrase = phrase + " " + terms[i]
					if positive(phrase.strip(),0.1) == True:count_pos += 1;total_count += 1
			chunked = chunkParser_neg.parse(tagged)
			for subtree in chunked.subtrees():
				if subtree.label() == 'NEG':
					phrase = ""
Example #25
0
# subjectivity() measures objective vs. subjective, as a number between 0.0 and 1.0.
# sentiment() returns a tuple of (polarity, subjectivity) for a given string.
for word in ("amazing", "horrible", "public"):
    print word, sentiment(word)

print
print sentiment(
    "The movie attempts to be surreal by incorporating time travel and various time paradoxes,"
    "but it's presented in such a ridiculous way it's seriously boring.") 

# The input string can also be a Synset, or a parsed Sentence, Text, Chunk or Word.

# positive() returns True if the string's polarity >= threshold.
# The threshold can be lowered or raised, 
# but overall for strings with multiple words +0.1 yields the best results.
print positive("good", threshold=0.1)
print positive("bad")
print

# You can also do sentiment analysis in Dutch, it works exactly the same:

#from pattern.nl import sentiment as sentiment_nl
#print "In Dutch:"
#print sentiment_nl("Een onwijs spannend goed boek!")

# You can also use Pattern with SentiWordNet.
# You can get SentiWordNet at: http://sentiwordnet.isti.cnr.it/
# Put the file "SentiWordNet*.txt" in pattern/en/wordnet/
# You can then use Synset.weight() and wordnet.sentiwordnet:

#from pattern.en import wordnet, ADJECTIVE
def get_recent_tweets(user, total_n=50):
    data = {}
    unbiasseddata = {}
    max_id = None
    total = 0
    n = 0

    global D
    global M
    global N
    global A
    global B
    global C

    while True:
        unbiassedtweets = api.search(geocode="44.467186,-73.214804,9mi",
                                     screen_name=user,
                                     count=total_n,
                                     max_id=max_id)

        if len(unbiassedtweets) == 0: break
        for unbiassedtweet in unbiassedtweets:
            n += 1

            if unbiassedtweet.id not in unbiasseddata:
                D += 1
                if positive(unbiassedtweet.text, threshold=0.1):
                    C += 1
                unbiasseddata[unbiassedtweet.id] = str(unbiassedtweet)
                if ("news" in unbiassedtweet.text
                        or "News" in unbiassedtweet.text
                        or "NEWS" in unbiassedtweet.text):
                    N += 1
                    if positive(unbiassedtweet.text, threshold=0.1):
                        B += 1
                        C -= 1
            if n >= total_n: break
        max_id = min([s.id for s in unbiassedtweets]) - 1
        if n >= total_n: break

    # return unbiasseddata.values()

    total = 0
    n = 0
    while True:
        """
        rawtweets = api.user_timeline(screen_name=user, count=total_n, max_id=max_id)
        """
        """rawtweets=api.geo_search(query="USA", granularity="country")
        """
        query = "Elections OR Clinton OR Donald OR Fake OR Hillary OR Obama OR Russian OR Trump OR Fake news OR news"
        rawtweets = api.search(q=query,
                               geocode="39.8,-95.583068847656,2500km",
                               screen_name=user,
                               count=total_n,
                               max_id=max_id)

        if len(rawtweets) == 0: break
        for rawtweet in rawtweets:
            n += 1

            if rawtweet.id not in data:
                data[rawtweet.id] = str(rawtweet)
                if rawtweet.id in unbiasseddata:
                    M += 1
                    if positive(rawtweet.text, threshold=0.1):
                        A += 1
                        C -= 1
                        B -= 1
            if n >= total_n: break
        max_id = min([s.id for s in rawtweets]) - 1
        if n >= total_n: break

    return data.values()
 word_cnt_pos = defaultdict(int)
 word_list = []
 count = 0
 noun_dict_pos = defaultdict(int)
 word_cnt_neg = defaultdict(int)
 noun_dict_neg = defaultdict(int)
 reviewHash = {}
 keyCount = 0
 for item in pittsburghHospitalReviews.find():
     keyCount = keyCount + 1
     reviewHash[keyCount] = item["text"]
 for key, value in reviewHash.iteritems():
     count += 1
     print count
     string = value
     if (positive(string, 0.1)):
         try:
             words = nltk.word_tokenize(string)
             tagged = nltk.pos_tag(words)
             chunkGram = "NP: {<JJ> <NN>|<JJ> <NNS>|<NN> <NNS>}"
             chunkParser = nltk.RegexpParser(chunkGram)
             chunked = chunkParser.parse(tagged)
             for subtree in chunked.subtrees():
                 if subtree.label() == 'NP':
                     string = subtree.leaves()
                     (terms, tags) = zip(*subtree)
                     for i in range(0, len(terms)):
                         word_list.append(terms[i].lower())
                         word_cnt_pos[terms[i].lower()] += 1
                     noun_dict_pos[(terms[0].lower(), terms[1].lower())] = 1
                     word_list = []
Example #28
0
print(
    sentiment(
        "The movie attempts to be surreal by incorporating time travel and various time paradoxes,"
        "but it's presented in such a ridiculous way it's seriously boring."))

# The input string can be:
# - a string,
# - a Synset (see pattern.en.wordnet),
# - a parsed Sentence, Text, Chunk or Word (see pattern.en),
# - a Document (see pattern.vector).

# The positive() function returns True if the string's polarity >= threshold.
# The threshold can be lowered or raised,
# but overall for strings with multiple words +0.1 yields the best results.
print("")
print("good", positive("good", threshold=0.1))
print("bad", positive("bad"))
print("")

# You can also do sentiment analysis in Dutch or French,
# it works exactly the same:

#from pattern.nl import sentiment as sentiment_nl
#print("In Dutch:")
#print(sentiment_nl("Een onwijs spannend goed boek!"))

# You can also use Pattern with SentiWordNet.
# You can get SentiWordNet at: http://sentiwordnet.isti.cnr.it/
# Put the file "SentiWordNet*.txt" in pattern/en/wordnet/
# You can then use Synset.weight() and wordnet.sentiwordnet:
print("")
print(sentiment(
    "The movie attempts to be surreal by incorporating time travel and various time paradoxes,"
    "but it's presented in such a ridiculous way it's seriously boring."))

# The input string can be:
# - a string, 
# - a Synset (see pattern.en.wordnet),
# - a parsed Sentence, Text, Chunk or Word (see pattern.en),
# - a Document (see pattern.vector).

# The positive() function returns True if the string's polarity >= threshold.
# The threshold can be lowered or raised,
# but overall for strings with multiple words +0.1 yields the best results.
print("")
print("good", positive("good", threshold=0.1))
print("bad", positive("bad"))
print("")

# You can also do sentiment analysis in Dutch or French,
# it works exactly the same:

#from pattern.nl import sentiment as sentiment_nl
#print("In Dutch:")
#print(sentiment_nl("Een onwijs spannend goed boek!"))

# You can also use Pattern with SentiWordNet.
# You can get SentiWordNet at: http://sentiwordnet.isti.cnr.it/
# Put the file "SentiWordNet*.txt" in pattern/en/wordnet/
# You can then use Synset.weight() and wordnet.sentiwordnet:
Example #30
0
                        dicWord[nn]=1
            else:
                for w in chunk:
                    if w.type=="JJ":
                       index=c.lower().find(w.string)
                       
                       print index
                      
                       print w
                       if index>0 and judge(c,index):
                            c=c[:index+len(w.string)]+'</span>'+c[index+len(w.string):]
                            c=c[:index]+'<span class=*JJ* >'+c[index:]
                       
                       #print c

        c='<span class=*sentence* sentiment=*'+str(sentiment(sentence))+'* positive=*'+str(positive(sentence))+'* mood=*'+str(mood(sentence))+'* modality=*'+str(modality(sentence))+'*>'+c+"</span>"
        c=c.replace('"','*')
        v.texts=v.texts+c
        #print c
        #pdb.set_trace()
        #print v.texts            
        
    print v.date
    #print v.nouns
    #print v.texts
    print v.stars
    
    cur.execute('insert into wZwZcte4lcbu51NOzCjWbQ values("'+v.date+'","'+v.user+'","'+v.nouns+'","'+str(v.stars)+'" ,"'+v.texts+'")')
#cur.execute('create table wordfre(word varchar(20) UNIQUE,uid integer)')
cur.close()    
cx.commit()
Example #31
0
print
print sentiment(
    "The movie attempts to be surreal by incorporating time travel and various time paradoxes,"
    "but it's presented in such a ridiculous way it's seriously boring.") 

# The input string can be:
# - a string, 
# - a Synset (see pattern.en.wordnet), 
# - a parsed Sentence, Text, Chunk or Word (see pattern.en),
# - a Document (see pattern.vector).

# The positive() function returns True if the string's polarity >= threshold.
# The threshold can be lowered or raised, 
# but overall for strings with multiple words +0.1 yields the best results.
print
print "good:", positive("good", threshold=0.1)
print " bad:", positive("bad")
print

# You can also do sentiment analysis in Dutch or French, 
# it works exactly the same:

#from pattern.nl import sentiment as sentiment_nl
#print "In Dutch:"
#print sentiment_nl("Een onwijs spannend goed boek!")

# You can also use Pattern with SentiWordNet.
# You can get SentiWordNet at: http://sentiwordnet.isti.cnr.it/
# Put the file "SentiWordNet*.txt" in pattern/en/wordnet/
# You can then use Synset.weight() and wordnet.sentiwordnet:
Example #32
0
# subjectivity() measures objective vs. subjective, as a number between 0.0 and 1.0.
# sentiment() returns a tuple of (polarity, subjectivity) for a given string.
for word in ("amazing", "horrible", "public"):
    print word, sentiment(word)

print
print sentiment(
    "The movie attempts to be surreal by incorporating time travel and various time paradoxes,"
    "but it's presented in such a ridiculous way it's seriously boring.")

# The input string can also be a Synset, or a parsed Sentence, Text, Chunk or Word.

# positive() returns True if the string's polarity >= threshold.
# The threshold can be lowered or raised,
# but overall for strings with multiple words +0.1 yields the best results.
print positive("good", threshold=0.1)
print positive("bad")
print

# You can also do sentiment analysis in Dutch, it works exactly the same:

#from pattern.nl import sentiment as sentiment_nl
#print "In Dutch:"
#print sentiment_nl("Een onwijs spannend goed boek!")

# You can also use Pattern with SentiWordNet.
# You can get SentiWordNet at: http://sentiwordnet.isti.cnr.it/
# Put the file "SentiWordNet*.txt" in pattern/en/wordnet/
# You can then use Synset.weight() and wordnet.sentiwordnet:

#from pattern.en import wordnet, ADJECTIVE
Example #33
0
#reload(sys)

# when you run this code make sure you put the correct location of file: CrimeReport.txt
in_file = open(
    '/Users/manasgaur/Desktop/MyApp/Chen_Python_work/Data_folder/CrimeReport.txt',
    'r')
tweets = []
for line in in_file:
    tweet = json.loads(line)
    tweets.append(tweet)

in_file.close()

for i in range(len(tweets)):
    val = tweets[i]["text"]
    if positive(val, threshold=0.1):
        if os.path.isfile('positive.txt'):
            with open('positive.txt', 'a') as pos:
                pos.write("Text")
                json.dump(val, pos)  #pos.write(str(val))
                pos.write('\n')
                pos.write('\n')
                pos.write("Sentiment Assessment:\n")
                l = sentiment(val).assessments
                json.dump(l, pos)
                pos.write('\n')
                pos.write('\n')
                #pos.close()
        else:
            in2_file = open('positive.txt', 'w')
            in2_file.write("Text")
Example #34
0
 def test_positive(self):
     # Assert that en.positive() yields polarity >= 0.1.
     s = "A great day!"
     self.assertTrue(en.positive(s))
     print "pattern.en.subjectivity()"
Example #35
0
from pattern.en import sentiment
from pattern.en import positive
import json

file_open = open('crime.txt' , 'r').readlines()

for line in file_open:
		
	tweet = json.loads(line)	
	
	t = tweet['text']
	s = sentiment(t)
	
	
	#print s[0]	
	
	if positive(t,0.1) == True:
		open('positive-sentiment-tweets.txt','a').write(json.dumps(tweet)+"\n")
	else:
		open('nagative-sentiment-tweets.txt','a').write(json.dumps(tweet)+"\n")
	
	
Example #36
0
 def test_positive(self):
     # Assert that en.positive() yields polarity >= 0.1.
     s = "A great day!"
     self.assertTrue(en.positive(s))
     print("pattern.en.subjectivity()")
import os
import json
from pattern.en import positive, sentiment

file_name = 'final_data/washington.txt'
positive_file = 'positive/washington.txt'
negative_file = 'negative/washington.txt'
positive_tweets = []
negative_tweets = []
with open(file_name) as in_file:
    for line in in_file:
        tweet = line

        if positive(tweet):
            positive_tweets.append(tweet)
        else:
            negative_tweets.append(tweet)

with open(positive_file, 'w') as out_file:
    for tweet in positive_tweets:
        out_file.write(tweet)
        out_file.write("\n")

with open(negative_file, 'w') as out_file:
    for tweet in negative_tweets:
        out_file.write(tweet)
        out_file.write("\n")