예제 #1
0
 def test_modality(self):
     # Assert -1.0 => +1.0 representing the degree of certainty.
     v = en.modality(en.Sentence(en.parse("I wish it would stop raining.")))
     self.assertTrue(v < 0)
     v = en.modality(
         en.Sentence(en.parse("It will surely stop raining soon.")))
     self.assertTrue(v > 0)
     # Assert the accuracy of the modality algorithm.
     # Given are the scores for the CoNLL-2010 Shared Task 1 Wikipedia uncertainty data:
     # http://www.inf.u-szeged.hu/rgai/conll2010st/tasks.html#task1
     # The baseline should increase (not decrease) when the algorithm is modified.
     from pattern.db import Datasheet
     from pattern.metrics import test
     sentences = []
     for certain, sentence in Datasheet.load(
             os.path.join(PATH, "corpora", "uncertainty-conll2010.csv")):
         sentence = en.parse(sentence, chunks=False, light=True)
         sentence = en.Sentence(sentence)
         sentences.append((sentence, int(certain) > 0))
     A, P, R, F = test(lambda sentence: en.modality(sentence) > 0.5,
                       sentences)
     #print A, P, R, F
     self.assertTrue(A > 0.69)
     self.assertTrue(P > 0.71)
     self.assertTrue(R > 0.64)
     self.assertTrue(F > 0.67)
     print "pattern.en.modality()"
예제 #2
0
 def test_modality(self):
     # Assert -1.0 => +1.0 representing the degree of certainty.
     v = en.modality(en.Sentence(en.parse("I wish it would stop raining.")))
     self.assertTrue(v < 0)
     v = en.modality(
         en.Sentence(en.parse("It will surely stop raining soon.")))
     self.assertTrue(v > 0)
     # Assert the accuracy of the modality algorithm.
     # Given are the scores for the CoNLL-2010 Shared Task 1 Wikipedia uncertainty data:
     # http://www.inf.u-szeged.hu/rgai/conll2010st/tasks.html#task1
     # The baseline should increase (not decrease) when the algorithm is
     # modified.
     from pattern.db import Datasheet
     from pattern.metrics import test
     sentences = []
     for certain, sentence in Datasheet.load(os.path.join(PATH, "corpora", "uncertainty-conll2010.csv")):
         sentence = en.parse(sentence, chunks=False, light=True)
         sentence = en.Sentence(sentence)
         sentences.append((sentence, int(certain) > 0))
     A, P, R, F = test(
         lambda sentence: en.modality(sentence) > 0.5, sentences)
     #print(A, P, R, F)
     self.assertTrue(A > 0.69)
     self.assertTrue(P > 0.72)
     self.assertTrue(R > 0.64)
     self.assertTrue(F > 0.68)
     print("pattern.en.modality()")
 def getData(self, params):
     if self.now_cache is not None:
         if (self.now_cache + datetime.timedelta(minutes=5)) < datetime.datetime.now():
             self.data_cache = None
             self.today_cache = None
             self.now_cache = None
     if self.data_cache is None:
         tweets = []
         for cand in candidates:
             tweets.append({'tweets': api.user_timeline(cand['user'], count=20), 
                             'name': cand['name'], 
                             'party': cand['party']})
         all_tweets = []
         for tweet_data in tweets:
             name = tweet_data['name']
             party = tweet_data['party']
             for tweet in tweet_data['tweets']:
                 all_tweets.append( {'Name': name,
                                     'Tweet': tweet.text, 
                                     'Favorites': tweet.favorite_count, 
                                     'Retweets': tweet.retweet_count} )
         dfs = pd.DataFrame(all_tweets)
         sentiments = [sentiment(tweet) for tweet in dfs['Tweet']]
         dfs['Polarity'] = [sent[0] for sent in sentiments]
         dfs['Subjectivity'] = [sent[1] for sent in sentiments]
         modal = [modality(Sentence(parse(tweet, lemmata=True))) for tweet in dfs['Tweet']]
         dfs['Certainty'] = modal
         today = date.strftime(datetime.datetime.now(), format='%m/%d/%Y, %H:%M')
         now = datetime.datetime.now()
         self.data_cache = dfs
         self.today_cache = today
         self.now_cache = now
     return self.data_cache
예제 #4
0
def add_modality(tdb):
        for tweet in tdb:
                s = parse(tweet[2], lemmata=True)
                s = Sentence(s)
                (form, score) = (mood(s), modality(s))
                tweet.extend((form, score))
        return tdb
예제 #5
0
def team_sentiment_analysis(stats):
	for s in stats.sentences:
		this_sentiment = sentiment(s)
		polarity = float("{0:.2f}".format(this_sentiment[0]))
		subjectivity = float("{0:.2f}".format(this_sentiment[1]))
		polarity_10 = float("{0:.1f}".format(this_sentiment[0]))
		subjectivity_10 = float("{0:.1f}".format(this_sentiment[1]))
		stats.polarity_counts[polarity] += 1
		stats.subjectivity_counts[subjectivity] += 1
		stats.polarity_counts_10s[polarity_10] += 1
		stats.subjectivity_counts_10s[subjectivity_10] += 1

		s = Sentence(parse(s, lemmata=True))
		stats.mood_counts[mood(s)] += 1
		rounded_modality = float("{0:.2f}".format(modality(s)))
		rounded_modality_10 = float("{0:.1f}".format(modality(s)))
		stats.modality_counts[rounded_modality] += 1
		stats.modality_counts_10s[rounded_modality_10] += 1
예제 #6
0
 def getData(self, params):
     if self.now_cache is not None:
         if (self.now_cache +
                 datetime.timedelta(minutes=5)) < datetime.datetime.now():
             self.data_cache = None
             self.today_cache = None
             self.now_cache = None
     if self.data_cache is None:
         tweets = []
         for cand in candidates:
             tweets.append({
                 'tweets':
                 api.user_timeline(cand['user'], count=20),
                 'name':
                 cand['name'],
                 'party':
                 cand['party']
             })
         all_tweets = []
         for tweet_data in tweets:
             name = tweet_data['name']
             party = tweet_data['party']
             for tweet in tweet_data['tweets']:
                 all_tweets.append({
                     'Name': name,
                     'Tweet': tweet.text,
                     'Favorites': tweet.favorite_count,
                     'Retweets': tweet.retweet_count
                 })
         dfs = pd.DataFrame(all_tweets)
         sentiments = [sentiment(tweet) for tweet in dfs['Tweet']]
         dfs['Polarity'] = [sent[0] for sent in sentiments]
         dfs['Subjectivity'] = [sent[1] for sent in sentiments]
         modal = [
             modality(Sentence(parse(tweet, lemmata=True)))
             for tweet in dfs['Tweet']
         ]
         dfs['Certainty'] = modal
         today = date.strftime(datetime.datetime.now(),
                               format='%m/%d/%Y, %H:%M')
         now = datetime.datetime.now()
         self.data_cache = dfs
         self.today_cache = today
         self.now_cache = now
     return self.data_cache
예제 #7
0
    def calculate_phrase_sentiment(self, phrases):
        # print "Rating phrases sentiment..."
        valence_list = []
        arousal_list = []
        for p in phrases:
            pol = sentiment(p)[0]
            sent = parse(p, lemmata=True)
            mod = modality(Sentence(sent))
            print mod
            valence_list.append(10 * pol)
            arousal_list.append(5 * mod)

        valence = max(valence_list)
        arousal = max(arousal_list)

        print "Valence: " + str(valence)
        print "arousal: " + str(arousal)
        return ((valence, arousal))
예제 #8
0
def messages():
	#sorts friends by sentiment and modality of their last message to you. Returns rankings as "Friends' Happiness" and "Friends' Confidence"
	graph = GraphAPI(token)
	me = f.profile()
	happiness = {}
	confidence = {}
	snippets = graph.fql('SELECT snippet, snippet_author FROM thread WHERE folder_id = 0 OR folder_id = 1 Limit 10000',3)
	#the above code was heavily influenced by arofcoding.blogspot.com/2012/10/python-script-to-fetch-messages-from.html
	#returns a dictionary of message snippets along with the corresponding facebook friend IDs
	for dictionary in snippets['data']:
	#puts snippets in a dictionary where each author is mapped to the sentiment of their message
		happiness[sentiment(dictionary['snippet'])] = dictionary['snippet_author']
		confidence[modality(dictionary['snippet'])] = dictionary['snippet_author']
	#ranks dictionary entries by positivity of sentiment
	happiness_rankings = rank(happiness)
	confidence_rankings = rank(confidence)
	print "Friends' Happiness (low to high):" 
	print happiness_rankings
	print "Friends' Confidence (low to high):"
	print confidence_rankings
 def transform(self, text_fields):
     stats = []
     punctuation = string.punctuation
     abvs = ['CNN', 'FBI', 'ABC', 'MSNBC', 'GOP', 'U.S.', 'US', 'ISIS', 'DNC', 'TV', 'CIA',
             'I', 'AP', 'PM', 'AM', 'EU', 'USA', 'UK', 'UN', 'CEO', 'NASA', 'LGBT', 'LGBTQ', 'NAFTA', 'ACLU']
     for field in text_fields:
         field_stats = {}
         tok_text = nltk.word_tokenize(field)
         try:
             num_upper = float(len([w for w in tok_text if w.isupper() and w not in abvs]))/len(tok_text)
         except:
             num_upper = 0
         try:
             num_punct = float(len([ch for ch in field if ch in punctuation]))/len(field)
         except:
             num_punct = 0   
         try:
             sent_lengths = [len(nltk.word_tokenize(s)) for s in nltk.sent_tokenize(field)]
             av_sent_len = float(sum(sent_lengths))/len(sent_lengths)
         except:
             av_sent_len = 0
         try:
             num_prof = float(len([w for w in tok_text if w.lower() in PROFANITY]))/len(tok_text)
         except:
             num_prof = 0
             
         mood = modality(field)    
         polarity, subjectivity = sentiment(field)
         field_stats['all_caps'] = num_upper
         field_stats['sent_len'] = av_sent_len
         field_stats['polarity'] = polarity
         field_stats['subjectivity'] = subjectivity
         field_stats['profanity'] = num_prof
         field_stats['mood'] = mood
         stats.append(field_stats)
     return stats
예제 #10
0
    print 'Review:'
    print review
    print
    print 'Labeled Sentiment:', review_sentiment    
    print    
    final_sentiment = analyze_sentiment_pattern_lexicon(review,
                                                        threshold=0.1,
                                                        verbose=True)
    print '-'*60            
      
for review, review_sentiment in sample_data:
    print 'Review:'
    print review
    print 'Labeled Sentiment:', review_sentiment 
    print 'Mood:', mood(review)
    mod_score = modality(review)
    print 'Modality Score:', round(mod_score, 2)
    print 'Certainty:', 'Strong' if mod_score > 0.5 \
                                    else 'Medium' if mod_score > 0.35 \
                                                    else 'Low'
    print '-'*60            

                  




                               
                                
pattern_predictions = [analyze_sentiment_pattern_lexicon(review, threshold=0.1)
                            for review in test_reviews]     
예제 #11
0
    def process(self, message):
        # print pattern_en.suggest(message) -- suggestions
        if message == ">!train":
            self.train()
            return "It is nice to learn new stuff."
        if message == ">!forget":
            memory.clear()
            return "I am reborn. So much free space :) maybe you will use files to store memory and not RAM..."
        if message == ">!load_page":
            if sessionId not in memory:
                response = "Hello! My name is Chad and I am passionate about music."
                response += "We can share our experiences and maybe we can get along."
                response += "Would you mind telling me your name first?"
                expect[sessionId] = "name"
                memory[sessionId] = dict()
            else:
                response = "Welcome back!"
                search.search("new songs")
                with open('results.json') as data_file:
                    data = json.load(data_file)
                    for i in range(10):
                        if 'musicrecording' in data['items'][i]['pagemap']:
                            mr = data['items'][i]['pagemap']['musicrecording']
                            which = random.randint(0, len(mr) - 1)
                            if 'name' not in mr[which]:
                                response += " Did you know that " + mr[which][
                                    'byartist'] + " has released a new song?"
                            else:
                                response += " You can check out this cool song, " + mr[which]['name'] + ", by " + \
                                            mr[which]['byartist']
            return response

        s = nlp.get_sentences(message)

        doc = spacy_nlp(message)
        for w in doc:
            print "(", w, w.dep_, w.pos_, w.head, ")"

        aiml_sent_type = []
        aiml_responses = []
        memory_responses = []
        sentence_types = []
        emotions = []

        for sentence in s:
            sentence_type = self.instant_classifier.classify(
                dialogue_act_features(sentence))

            sentence_types.append(sentence_type)

            polarity, subjective = pattern_en.sentiment(sentence)
            sent = pattern_en.parse(sentence, lemmata=True)
            sent = pattern_en.Sentence(sent)
            modality = pattern_en.modality(sent)
            mood = pattern_en.mood(sent)

            if polarity > 0.8:
                emotions.append("SUPER HAPPY")
            elif polarity > 0.3:
                emotions.append("GOOD SURPRISE")
            elif polarity < -0.4:
                emotions.append("FEAR")
            elif polarity > 0.4:
                emotions.append("COOL")
            elif polarity < -0.1:
                emotions.append("SAD")
            elif polarity < -0.7:
                emotions.append("ANGER")
            else:
                emotions.append("NEUTER")

            print sentence_type, polarity, subjective, modality, mood

            if sentence_type not in ["whQuestion", "ynQuestion"]:
                try:
                    aiml_sent_type_res = self.kernel.respond(
                        sentence_type, sessionId)
                except:
                    aiml_sent_type_res = ""
                aiml_sent_type.append(aiml_sent_type_res)

            verbs_subj = set()
            sentence = sentence[0].upper() + sentence[1:]
            doc = spacy_nlp(sentence)
            for possible_subject in doc:
                if (possible_subject.dep == nsubj or possible_subject.dep
                        == nsubjpass) and possible_subject.head.pos == VERB:
                    verbs_subj.add((possible_subject, possible_subject.head))

            try:
                aiml_response = self.kernel.respond(sentence, sessionId)
            except:
                aiml_response = ""
            aiml_responses.append(aiml_response)

            # MEMORY MODULE
            memory_msg = ""
            if sentence_type == "Statement":
                # insert into memory
                for i in verbs_subj:
                    subjs = []
                    subjects = [i[0]]
                    for tok in i[0].children:
                        if tok.dep == conj:
                            subjects.append(tok)

                    for subj in subjects:
                        predec = ""
                        for tok in subj.children:
                            if tok.dep_ == "poss" or tok.dep == amod:
                                predec += tok.lower_
                        if len(predec) > 0:
                            subjs.append(predec + " " + subj.lower_)
                        else:
                            subjs.append(subj.lower_)

                    vb = i[1].lower_
                    if vb not in memory[sessionId]:
                        memory[sessionId][vb] = dict()
                    for subj in subjs:
                        for c in i[1].children:
                            if c.dep in [prep]:
                                memory[sessionId][vb][subj] = c.lower_ + " "
                                for c_prep in c.children:
                                    if c_prep.dep in [dobj, pobj, attr]:
                                        memory[sessionId][vb][
                                            subj] += c_prep.text
                                        memory_responses.append(
                                            self.kernel.respond(
                                                "memorate", sessionId))
                            elif c.dep in [dobj, pobj, attr]:
                                memory[sessionId][vb][subj] = c.text
                                memory_responses.append(
                                    self.kernel.respond("memorate", sessionId))
            elif sentence_type == "whQuestion":
                for i in verbs_subj:
                    subjs = []
                    subjects = [i[0]]
                    for tok in i[0].children:
                        if tok.dep == conj:
                            subjects.append(tok)

                    for subj in subjects:
                        predec = ""
                        for tok in subj.children:
                            if tok.dep_ == "poss" or tok.dep == amod:
                                predec += tok.lower_
                        if len(predec) > 0:
                            subjs.append(predec + " " + subj.lower_)
                        else:
                            subjs.append(subj.lower_)

                    max_similarity = 0
                    verb = i[1].lower_
                    for j in memory[sessionId]:
                        p_word = spacy_nlp(j)
                        similarity = i[1].similarity(p_word[0])
                        if similarity > max_similarity:
                            max_similarity = similarity
                            verb = j
                    if max_similarity > 0.5 and verb in memory[sessionId]:
                        num_subjs = len(subjs)
                        memory_msg = ""
                        for subj in subjs:
                            if subj in memory[sessionId][verb]:
                                toks = nlp.tokenize_text(subj)
                                memory_msg = ""
                                for t in toks:
                                    if t in first_person:
                                        memory_msg += pron_translate[t] + " "
                                    else:
                                        memory_msg += t + " "
                                num_subjs -= 1
                                if num_subjs > 2:
                                    memory_msg += ", "
                                elif num_subjs == 1:
                                    memory_msg += "and "
                        if len(memory_msg) > 0:
                            memory_msg += verb + " "
                            if num_subjs != len(subjs):
                                memory_msg += memory[sessionId][verb][
                                    subjs[-1]] + "."
            memory_responses.append(memory_msg)

        arr_response = []

        for i in aiml_sent_type:
            if len(i) > 0:
                arr_response.append(i)

        for i in aiml_responses:
            if len(i) > 0:
                arr_response.append(i)

        for i in memory_responses:
            if len(i) > 0:
                arr_response.append(i)

        if len(arr_response) == 0:
            data = search.search(message)
            snip = data['items'][0]['snippet']
            sents = nlp.get_sentences(snip)
            arr_response.append(sents[0])

        response = ""

        for i in emotions:
            try:
                emoi = self.kernel.respond(i, sessionId)
            except:
                emoi = None
            if emoi is not None:
                if random.randint(0, 100) < 50:
                    response += " " + emoi + "."
                    break

        for res in arr_response:
            if len(res) > 1:
                response += res + " "

        # generic response, if no response
        restoks = nlp.tokenize_text(response)
        if len(restoks) == 0:
            idx = random.randint(0, len(sentence_types) - 1)
            try:
                aiml_response = self.kernel.respond(sentence_types[idx],
                                                    sessionId)
            except:
                aiml_response = ""
            response += aiml_response

        # polarity, subjective = pattern_en.sentiment(response)
        # sent = pattern_en.parse(sentence, lemmata=True)
        # sent = pattern_en.Sentence(sent)
        # modality = pattern_en.modality(sent)
        # mood = pattern_en.mood(sent)
        # sentence_type = self.instant_classifier.classify(dialogue_act_features(response))
        # print response, polarity, subjective, modality, mood

        return response
예제 #12
0
def extract_bias_features(text):
    features = {}
    txt_lwr = str(text).lower()
    words = nltk.word_tokenize(txt_lwr)
    words = [w for w in words if len(w) > 0 and w not in '.?!,;:\'s"$']
    if len(words) < 1:
        return None
    unigrams = sorted(list(set(words)))
    bigram_tokens = nltk.bigrams(words)
    bigrams = [" ".join([w1, w2]) for w1, w2 in sorted(set(bigram_tokens))]
    trigram_tokens = nltk.trigrams(words)
    trigrams = [" ".join([w1, w2, w3]) for w1, w2, w3 in sorted(set(trigram_tokens))]
    # print words
    # print unigrams
    # print bigrams
    # print trigrams
    # print "----------------------"

    # word count
    features['word_count'] = float(len(words))

    # unique word count
    features['unique_word_count'] = float(len(unigrams))

    # coherence marker count
    count, instances = count_feature_list_freq(coherence, words, bigrams, trigrams)
    # if count > 0:
    features['coherence_marker_count'] = count
    features['coherence_marker_prop'] = round(float(count) / float(len(words)), 4)
    features['coherence_marker_list'] = instances

    # degree modifier count
    count, instances = count_feature_list_freq(modifiers, words, bigrams, trigrams)
    #if count > 0:
    features['degree_modifier_count'] = count
    features['degree_modifier_prop'] = round(float(count) / float(len(words)), 4)
    features['degree_modifier_list'] = instances

    # hedge word count
    count, instances = count_feature_list_freq(hedges, words, bigrams, trigrams)
    #if count > 0:
    features['hedge_word_count'] = count
    features['hedge_word_prop'] = round(float(count) / float(len(words)), 4)
    features['hedge_word_list'] = instances

    # factive verb count
    count, instances = count_feature_list_freq(factives, words, bigrams, trigrams)
    #if count > 0:
    features['factive_verb_count'] = count
    features['factive_verb_prop'] = round(float(count) / float(len(words)), 4)
    features['factive_verb_list'] = instances

    # assertive verb count
    count, instances = count_feature_list_freq(assertives, words, bigrams, trigrams)
    #if count > 0:
    features['assertive_verb_count'] = count
    features['assertive_verb_prop'] = round(float(count) / float(len(words)), 4)
    features['assertive_verb_list'] = instances

    # implicative verb count
    count, instances = count_feature_list_freq(implicatives, words, bigrams, trigrams)
    #if count > 0:
    features['implicative_verb_count'] = count
    features['implicative_verb_prop'] = round(float(count) / float(len(words)), 4)
    features['implicative_verb_list'] = instances

    # bias words and phrases count
    count, instances = count_feature_list_freq(biased, words, bigrams, trigrams)
    #if count > 0:
    features['bias_count'] = count
    features['bias_prop'] = round(float(count) / float(len(words)), 4)
    features['bias_list'] = instances

    # opinion word count
    count, instances = count_feature_list_freq(opinionLaden, words, bigrams, trigrams)
    #if count > 0:
    features['opinion_count'] = count
    features['opinion_prop'] = round(float(count) / float(len(words)), 4)
    features['opinion_list'] = instances

    # weak subjective word count
    count, instances = count_feature_list_freq(subj_weak, words, bigrams, trigrams)
    #if count > 0:
    features['subjective_weak_count'] = count
    features['subjective_weak_prop'] = round(float(count) / float(len(words)), 4)
    features['subjective_weak_list'] = instances

    # strong subjective word count
    count, instances = count_feature_list_freq(subj_strong, words, bigrams, trigrams)
    #if count > 0:
    features['subjective_strong_count'] = count
    features['subjective_strong_prop'] = round(float(count) / float(len(words)), 4)
    features['subjective_strong_list'] = instances

    # composite sentiment score using VADER sentiment analysis package
    compound_sentiment = vader_sentiment_analysis.polarity_scores(text)['compound']
    features['vader_composite_sentiment'] = float(compound_sentiment)

    # subjectivity score using Pattern.en
    pattern_subjectivity = pattern_sentiment(text)[1]
    features['subjectivity_score'] = round(pattern_subjectivity, 4)

    # modality (certainty) score and mood using  http://www.clips.ua.ac.be/pages/pattern-en#modality
    sentence = parse(text, lemmata=True)
    sentenceObj = Sentence(sentence)
    features['modality'] = round(modality(sentenceObj), 4)
    try:
        features['mood'] = mood(sentenceObj)
    except IndexError as e:
        print "IndexError: %s" % e
        print "Ignoring..."
        features['mood'] = 'err'

    # Flesch-Kincaid Grade Level (reading difficulty) using textstat
    try:
        features['flesch-kincaid_grade_level'] = float(textstat.flesch_kincaid_grade(text))
    except TypeError as e:
        print "TypeError: %s" % e
        print "Ignoring..."
        features['flesch-kincaid_grade_level'] = 0.0

    # liwc 3rd person pronoun count (combines S/he and They)
    count, instances = count_liwc_list_freq(liwc_3pp, words)
    #if count > 0:
    features['liwc_3rd_person_pronoum_count'] = count
    features['liwc_3rd_person_pronoun_prop'] = round(float(count) / float(len(words)), 4)
    features['liwc_3rd_person_pronoun_list'] = instances

    # liwc auxiliary verb count
    count, instances = count_liwc_list_freq(liwc_aux, words)
    #if count > 0:
    features['liwc_auxiliary_verb_count'] = count
    features['liwc_auxiliary_verb_prop'] = round(float(count) / float(len(words)), 4)
    features['liwc_auxiliary_verb_list'] = instances

    # liwc adverb count
    count, instances = count_liwc_list_freq(liwc_adv, words)
    #if count > 0:
    features['liwc_adverb_count'] = count
    features['liwc_adverb_prop'] = round(float(count) / float(len(words)), 4)
    features['liwc_adverb_list'] = instances

    # liwc preposition count
    count, instances = count_liwc_list_freq(liwc_prep, words)
    #if count > 0:
    features['liwc_preposition_count'] = count
    features['liwc_preposition_prop'] = round(float(count) / float(len(words)), 4)
    features['liwc_preposition_list'] = instances

    # liwc conjunction count
    count, instances = count_liwc_list_freq(liwc_conj, words)
    #if count > 0:
    features['liwc_conjunction_count'] = count
    features['liwc_conjunction_prop'] = round(float(count) / float(len(words)), 4)
    features['liwc_conjunction_list'] = instances

    # liwc discrepency word count
    count, instances = count_liwc_list_freq(liwc_discr, words)
    #if count > 0:
    features['liwc_discrepency_word_count'] = count
    features['liwc_discrepency_word_prop'] = round(float(count) / float(len(words)), 4)
    features['liwc_discrepency_word_list'] = instances

    # liwc tentative word count
    count, instances = count_liwc_list_freq(liwc_tent, words)
    #if count > 0:
    features['liwc_tentative_word_count'] = count
    features['liwc_tentative_word_prop'] = round(float(count) / float(len(words)), 4)
    features['liwc_tentative_word_list'] = instances

    # liwc certainty word count
    count, instances = count_liwc_list_freq(liwc_cert, words)
    #if count > 0:
    features['liwc_certainty_word_count'] = count
    features['liwc_certainty_word_prop'] = round(float(count) / float(len(words)), 4)
    features['liwc_certainty_word_list'] = instances

    # liwc causation word count
    count, instances = count_liwc_list_freq(liwc_causn, words)
    #if count > 0:
    features['liwc_causation_word_count'] = count
    features['liwc_causation_word_prop'] = round(float(count) / float(len(words)), 4)
    features['liwc_causation_word_list'] = instances

    # liwc work word count
    count, instances = count_liwc_list_freq(liwc_work, words)
    #if count > 0:
    features['liwc_work_word_count'] = count
    features['liwc_work_word_prop'] = round(float(count) / float(len(words)), 4)
    features['liwc_work_word_list'] = instances

    # liwc achievement word count
    count, instances = count_liwc_list_freq(liwc_achiev, words)
    #if count > 0:
    features['liwc_achievement_word_count'] = count
    features['liwc_achievement_word_prop'] = round(float(count) / float(len(words)), 4)
    features['liwc_achievement_word_list'] = instances

    return features
예제 #13
0
               type=None,
               role=None,
               relation=None)
print pnp.string  # String of words (Unicode).
print pnp.chunks  # List of Chunk objects.
# print pnp.preposition            # First PP chunk in the PNP.
# sentiment
print sentiment(
    "The movie attempts to be surreal by incorporating various time paradoxes,"
    "but it's presented in such a ridiculous way it's seriously boring.")
print sentiment('Wonderfully awful! :-)').assessments
# mode and modality
s = "Some amino acids tend to be acidic while others may be basic."  # weaseling
s = parse(s, lemmata=True)
s = Sentence(s)
print modality(s)
# wordnet
s = wordnet.synsets('bird')[0]
print 'Definition:', s.gloss  # Definition string.
print '  Synonyms:', s.synonyms  # List of word forms (i.e., synonyms)
print ' Hypernyms:', s.hypernyms(
)  # returns a list of  parent synsets (i.e., more general). Synset (semantic parent).
print ' Hypernyms:', s.hypernyms(recursive=False, depth=None)
print '  Hyponyms:', s.hyponyms(
)  # returns a list child synsets (i.e., more specific).
print '  Hyponyms:', s.hyponyms(recursive=False, depth=None)
print '  Holonyms:', s.holonyms(
)  # List of synsets (of which this is a member).
print '  Meronyms:', s.meronyms()  # List of synsets (members/parts).
print '       POS:', s.pos  # Part-of-speech: NOUN | VERB | ADJECTIVE | ADVERB.
print '  Category:', s.lexname  # Category string, or None.
    from docs import TEST_DOCUMENTS

    for doc in TEST_DOCUMENTS:
        sentences = doc['sentences']
        conditionals = 0
        indicatives = 0
        imperatives = 0
        subjunctives = 0
        minModality = 1
        maxModality = -1

        for sentence in sentences:
            s = parse(sentence, lemmata=True)
            s = Sentence(s)
            m = mood(s)
            modal = modality(s)
            #set the max or min value
            if modal > maxModality:
                maxModality = modal
            if modal < minModality:
                minModality = modal
#this count moods
            if m is "conditional":
                conditionals = conditionals + 1
            elif m is "indicative":
                indicatives = indicatives + 1
            elif m is "imperative":
                imperatives = imperatives + 1
            elif m is "subjunctive":
                subjunctives = subjunctives + 1
        writer.writerow({
예제 #15
0
# Explanation:
#
# - 0.75 show the sentiment score of the sentence that means highly positive
# - 0.8 is the subjectivity score that is a personal of the user

# ### Checking if a Statement is a Fact

from pattern.en import parse, Sentence
from pattern.en import modality

text = "Paris is the capital of France"
sent = parse(text, lemmata=True)
sent = Sentence(sent)

print(modality(sent))

text = "I think we can complete this task"
sent = parse(text, lemmata=True)
sent = Sentence(sent)

print(modality(sent))

# ### Spelling Corrections

from pattern.en import suggest

print(suggest("Whitle"))

from pattern.en import suggest
print(suggest("Fracture"))
예제 #16
0
def extract_bias_features(text):
    features = {}
    text = unicode(text, errors='ignore')
    txt_lwr = str(text).lower()
    words = nltk.word_tokenize(txt_lwr)
    words = [w for w in words if len(w) > 0 and w not in '.?!,;:\'s"$']
    unigrams = sorted(list(set(words)))
    bigram_tokens = nltk.bigrams(words)
    bigrams = [" ".join([w1, w2]) for w1, w2 in sorted(set(bigram_tokens))]
    trigram_tokens = nltk.trigrams(words)
    trigrams = [
        " ".join([w1, w2, w3]) for w1, w2, w3 in sorted(set(trigram_tokens))
    ]
    # print words
    # print unigrams
    # print bigrams
    # print trigrams
    # print "----------------------"

    # word count
    features['word_cnt'] = len(words)

    # unique word count
    features['unique_word_cnt'] = len(unigrams)

    # coherence marker count
    count = count_feature_list_freq(coherence, words, bigrams, trigrams)
    features['cm_cnt'] = count
    features['cm_rto'] = round(float(count) / float(len(words)), 4)

    # degree modifier count
    count = count_feature_list_freq(modifiers, words, bigrams, trigrams)
    features['dm_cnt'] = count
    features['dm_rto'] = round(float(count) / float(len(words)), 4)

    # hedge word count
    count = count_feature_list_freq(hedges, words, bigrams, trigrams)
    features['hedge_cnt'] = count
    features['hedge_rto'] = round(float(count) / float(len(words)), 4)

    # factive verb count
    count = count_feature_list_freq(factives, words, bigrams, trigrams)
    features['factive_cnt'] = count
    features['factive_rto'] = round(float(count) / float(len(words)), 4)

    # assertive verb count
    count = count_feature_list_freq(assertives, words, bigrams, trigrams)
    features['assertive_cnt'] = count
    features['assertive_rto'] = round(float(count) / float(len(words)), 4)

    # implicative verb count
    count = count_feature_list_freq(implicatives, words, bigrams, trigrams)
    features['implicative_cnt'] = count
    features['implicative_rto'] = round(float(count) / float(len(words)), 4)

    # bias words and phrases count
    count = count_feature_list_freq(biased, words, bigrams, trigrams)
    features['bias_cnt'] = count
    features['bias_rto'] = round(float(count) / float(len(words)), 4)

    # opinion word count
    count = count_feature_list_freq(opinionLaden, words, bigrams, trigrams)
    features['opinion_cnt'] = count
    features['opinion_rto'] = round(float(count) / float(len(words)), 4)

    # weak subjective word count
    count = count_feature_list_freq(subj_weak, words, bigrams, trigrams)
    features['subj_weak_cnt'] = count
    features['subj_weak_rto'] = round(float(count) / float(len(words)), 4)

    # strong subjective word count
    count = count_feature_list_freq(subj_strong, words, bigrams, trigrams)
    features['subj_strong_cnt'] = count
    features['subj_strong_rto'] = round(float(count) / float(len(words)), 4)

    # composite sentiment score using VADER sentiment analysis package
    compound_sentiment = vader_sentiment_analysis.polarity_scores(
        text)['compound']
    features['vader_sentiment'] = compound_sentiment

    # subjectivity score using Pattern.en
    pattern_subjectivity = pattern_sentiment(text)[1]
    features['subjectivity'] = round(pattern_subjectivity, 4)

    # modality (certainty) score and mood using  http://www.clips.ua.ac.be/pages/pattern-en#modality
    sentence = parse(text, lemmata=True)
    sentenceObj = Sentence(sentence)
    features['modality'] = round(modality(sentenceObj), 4)
    features['mood'] = mood(sentenceObj)

    # Flesch-Kincaid Grade Level (reading difficulty) using textstat
    features['fk_gl'] = textstat.flesch_kincaid_grade(text)

    # liwc 3rd person pronoun count (combines S/he and They)
    count = count_liwc_list_freq(liwc_3pp, words)
    features['liwc_3pp_cnt'] = count
    features['liwc_3pp_rto'] = round(float(count) / float(len(words)), 4)

    # liwc auxiliary verb count
    count = count_liwc_list_freq(liwc_aux, words)
    features['liwc_aux_cnt'] = count
    features['liwc_aux_rto'] = round(float(count) / float(len(words)), 4)

    # liwc adverb count
    count = count_liwc_list_freq(liwc_adv, words)
    features['liwc_adv_cnt'] = count
    features['liwc_adv_rto'] = round(float(count) / float(len(words)), 4)

    # liwc preposition count
    count = count_liwc_list_freq(liwc_prep, words)
    features['liwc_prep_cnt'] = count
    features['liwc_prep_rto'] = round(float(count) / float(len(words)), 4)

    # liwc conjunction count
    count = count_liwc_list_freq(liwc_conj, words)
    features['liwc_conj_cnt'] = count
    features['liwc_conj_rto'] = round(float(count) / float(len(words)), 4)

    # liwc discrepency word count
    count = count_liwc_list_freq(liwc_discr, words)
    features['liwc_discr_cnt'] = count
    features['liwc_discr_rto'] = round(float(count) / float(len(words)), 4)

    # liwc tentative word count
    count = count_liwc_list_freq(liwc_tent, words)
    features['liwc_tent_cnt'] = count
    features['liwc_tent_rto'] = round(float(count) / float(len(words)), 4)

    # liwc certainty word count
    count = count_liwc_list_freq(liwc_cert, words)
    features['liwc_cert_cnt'] = count
    features['liwc_cert_rto'] = round(float(count) / float(len(words)), 4)

    # liwc causation word count
    count = count_liwc_list_freq(liwc_causn, words)
    features['liwc_causn_cnt'] = count
    features['liwc_causn_rto'] = round(float(count) / float(len(words)), 4)

    # liwc work word count
    count = count_liwc_list_freq(liwc_work, words)
    features['liwc_work_cnt'] = count
    features['liwc_work_rto'] = round(float(count) / float(len(words)), 4)

    # liwc achievement word count
    count = count_liwc_list_freq(liwc_achiev, words)
    features['liwc_achiev_cnt'] = count
    features['liwc_achiev_rto'] = round(float(count) / float(len(words)), 4)

    return features
예제 #17
0
def modality_score(sentence):
    s = parse(sentence, lemmata=True)
    s = Sentence(s)

    return modality(s)
print PAST in tenses('purred') # 'p' in tenses() also works.
print (PAST, 1, PL) in tenses('purred') 

print 'Quantification'

print quantify(['goose', 'goose', 'duck', 'chicken', 'chicken', 'chicken'])
print quantify('carrot', amount=90)
print quantify({'carrot': 100, 'parrot': 20})

print 'ngrams'
print ngrams("I am eating a pizza.", n=2)


#parse
s = parse('I eat pizza with a fork.')
pprint(s)

#tag
for word, t in tag('The cat felt happy.'):
    print word +' is ' +t     
    
s = "The movie attempts to be surreal by incorporating various time paradoxes, but it's presented in such a ridiculous way it's seriously boring."    
print sentiment(s)     
print polarity(s)
print subjectivity(s)

#The modality() function returns a value between -1.0 and +1.0, expressing the degree of certainty
s2 = "Some amino acids tend to be acidic while others may be basic." # weaseling
se = Sentence(parse(s, chunks=False, lemmata=True))
print modality(se)
예제 #19
0
                        dicWord[nn]=1
            else:
                for w in chunk:
                    if w.type=="JJ":
                       index=c.lower().find(w.string)
                       
                       print index
                      
                       print w
                       if index>0 and judge(c,index):
                            c=c[:index+len(w.string)]+'</span>'+c[index+len(w.string):]
                            c=c[:index]+'<span class=*JJ* >'+c[index:]
                       
                       #print c

        c='<span class=*sentence* sentiment=*'+str(sentiment(sentence))+'* positive=*'+str(positive(sentence))+'* mood=*'+str(mood(sentence))+'* modality=*'+str(modality(sentence))+'*>'+c+"</span>"
        c=c.replace('"','*')
        v.texts=v.texts+c
        #print c
        #pdb.set_trace()
        #print v.texts            
        
    print v.date
    #print v.nouns
    #print v.texts
    print v.stars
    
    cur.execute('insert into wZwZcte4lcbu51NOzCjWbQ values("'+v.date+'","'+v.user+'","'+v.nouns+'","'+str(v.stars)+'" ,"'+v.texts+'")')
#cur.execute('create table wordfre(word varchar(20) UNIQUE,uid integer)')
cur.close()    
cx.commit()
예제 #20
0
print conjugate('purred', '3sg')
print PAST in tenses('purred')  # 'p' in tenses() also works.
print(PAST, 1, PL) in tenses('purred')

print 'Quantification'

print quantify(['goose', 'goose', 'duck', 'chicken', 'chicken', 'chicken'])
print quantify('carrot', amount=90)
print quantify({'carrot': 100, 'parrot': 20})

print 'ngrams'
print ngrams("I am eating a pizza.", n=2)

#parse
s = parse('I eat pizza with a fork.')
pprint(s)

#tag
for word, t in tag('The cat felt happy.'):
    print word + ' is ' + t

s = "The movie attempts to be surreal by incorporating various time paradoxes, but it's presented in such a ridiculous way it's seriously boring."
print sentiment(s)
print polarity(s)
print subjectivity(s)

#The modality() function returns a value between -1.0 and +1.0, expressing the degree of certainty
s2 = "Some amino acids tend to be acidic while others may be basic."  # weaseling
se = Sentence(parse(s, chunks=False, lemmata=True))
print modality(se)
예제 #21
0
def checkModality(sentence):
    return modality(sentence)
예제 #22
0
    def get_score(self, content: str):
        self.sent = parse(content, lemmata=True)
        self.sent = Sentence(self.sent)
        self.modality = modality(self.sent)

        return self.modality