def sentiment_analysis(message): actual_range = 2 final = [] message = re.sub("(@[A-Za-z0-9]+)|( RT)|( rt)|(\w+:\/\/\S+)"," ",message).strip() #filter usernames,urls message = re.sub('#',"",message) message = filter(lambda x: x in string.printable, message) #filter non printable characters message = HTMLParser.HTMLParser().unescape(message) #unescape html tokenized = tokenize(message,puctuation='.!?:') tokenized = filter(bool,tokenized) tok1=[] for index,it in enumerate(tokenized): mod = mood(it) if '?' in it or mod=='conditional': continue tok1.append(it.strip()) score = 0.0 possed = [re.split(' ',sentence)for sentence in tok1] possed = [nltk.pos_tag(sentence) for sentence in possed] final = [] for sentence in possed: check = [] for entry in sentence: check.append(list(entry)) final.append(check) range_count=0 for sentence in final: sentence = dictionary_tag(sentence) score = score + sentiment_score(sentence) return score
def test_mood(self): # Assert imperative mood. v = en.mood(en.Sentence(en.parse("Do your homework!"))) self.assertEqual(v, en.IMPERATIVE) # Assert conditional mood. v = en.mood(en.Sentence(en.parse("We ought to help him."))) self.assertEqual(v, en.CONDITIONAL) # Assert subjunctive mood. v = en.mood(en.Sentence(en.parse("I wouldn't do that if I were you."))) self.assertEqual(v, en.SUBJUNCTIVE) # Assert indicative mood. v = en.mood(en.Sentence(en.parse("The weather is nice today."))) self.assertEqual(v, en.INDICATIVE) print "pattern.en.mood()"
def test_mood(self): # Assert imperative mood. v = en.mood(en.Sentence(en.parse("Do your homework!"))) self.assertEqual(v, en.IMPERATIVE) # Assert conditional mood. v = en.mood(en.Sentence(en.parse("We ought to help him."))) self.assertEqual(v, en.CONDITIONAL) # Assert subjunctive mood. v = en.mood(en.Sentence(en.parse("I wouldn't do that if I were you."))) self.assertEqual(v, en.SUBJUNCTIVE) # Assert indicative mood. v = en.mood(en.Sentence(en.parse("The weather is nice today."))) self.assertEqual(v, en.INDICATIVE) print("pattern.en.mood()")
def add_modality(tdb): for tweet in tdb: s = parse(tweet[2], lemmata=True) s = Sentence(s) (form, score) = (mood(s), modality(s)) tweet.extend((form, score)) return tdb
def sentiment_analysis(message): actual_range = 2 final = [] message = re.sub("(@[A-Za-z0-9]+)|( RT)|( rt)|(\w+:\/\/\S+)", " ", message).strip() #filter usernames,urls message = re.sub('#', "", message) message = filter(lambda x: x in string.printable, message) #filter non printable characters message = HTMLParser.HTMLParser().unescape(message) #unescape html tokenized = tokenize(message, puctuation='.!?:') tokenized = filter(bool, tokenized) tok1 = [] for index, it in enumerate(tokenized): mod = mood(it) if '?' in it or mod == 'conditional': continue tok1.append(it.strip()) score = 0.0 possed = [re.split(' ', sentence) for sentence in tok1] possed = [nltk.pos_tag(sentence) for sentence in possed] final = [] for sentence in possed: check = [] for entry in sentence: check.append(list(entry)) final.append(check) range_count = 0 for sentence in final: sentence = dictionary_tag(sentence) score = score + sentiment_score(sentence) return score
def extractMood(characterSentences): """ Analyzes the sentence using grammatical mood module from pattern. """ characterMoods = defaultdict(list) for key, value in characterSentences.iteritems(): for x in value: characterMoods[key].append(mood(Sentence(parse(str(x), lemmata=True)))) return characterMoods
def extractMood(characterSentences): """ Analyzes the sentence using grammatical mood module from pattern. """ characterMoods = defaultdict(list) for key, value in characterSentences.iteritems(): for x in value: characterMoods[key].append( mood(Sentence(parse(str(x), lemmata=True)))) return characterMoods
def get_mood(sentence): """Returns mood of sentence string""" conditional_words = [ "assuming", "if", "in case", "no matter how", "supposing", "unless", "would", "'d", "should", "could", "might", "going to", "whenever", "as long as", "because", "in order to" ] result = mood(sentence) if result == 'imperative': return 'nonconditional' if result in ['subjunctive', 'conditional']: for cw in conditional_words: if cw in sentence.lower(): return 'conditional' return 'subjunctive' return 'nonconditional' # indicative
def team_sentiment_analysis(stats): for s in stats.sentences: this_sentiment = sentiment(s) polarity = float("{0:.2f}".format(this_sentiment[0])) subjectivity = float("{0:.2f}".format(this_sentiment[1])) polarity_10 = float("{0:.1f}".format(this_sentiment[0])) subjectivity_10 = float("{0:.1f}".format(this_sentiment[1])) stats.polarity_counts[polarity] += 1 stats.subjectivity_counts[subjectivity] += 1 stats.polarity_counts_10s[polarity_10] += 1 stats.subjectivity_counts_10s[subjectivity_10] += 1 s = Sentence(parse(s, lemmata=True)) stats.mood_counts[mood(s)] += 1 rounded_modality = float("{0:.2f}".format(modality(s))) rounded_modality_10 = float("{0:.1f}".format(modality(s))) stats.modality_counts[rounded_modality] += 1 stats.modality_counts_10s[rounded_modality_10] += 1
for review, review_sentiment in sample_data: print 'Review:' print review print print 'Labeled Sentiment:', review_sentiment print final_sentiment = analyze_sentiment_pattern_lexicon(review, threshold=0.1, verbose=True) print '-'*60 for review, review_sentiment in sample_data: print 'Review:' print review print 'Labeled Sentiment:', review_sentiment print 'Mood:', mood(review) mod_score = modality(review) print 'Modality Score:', round(mod_score, 2) print 'Certainty:', 'Strong' if mod_score > 0.5 \ else 'Medium' if mod_score > 0.35 \ else 'Low' print '-'*60 pattern_predictions = [analyze_sentiment_pattern_lexicon(review, threshold=0.1)
from docs import TEST_DOCUMENTS for doc in TEST_DOCUMENTS: sentences = doc['sentences'] conditionals = 0 indicatives = 0 imperatives = 0 subjunctives = 0 minModality = 1 maxModality = -1 for sentence in sentences: s = parse(sentence, lemmata=True) s = Sentence(s) m = mood(s) modal = modality(s) #set the max or min value if modal > maxModality: maxModality = modal if modal < minModality: minModality = modal #this count moods if m is "conditional": conditionals = conditionals + 1 elif m is "indicative": indicatives = indicatives + 1 elif m is "imperative": imperatives = imperatives + 1 elif m is "subjunctive": subjunctives = subjunctives + 1
def extract_bias_features(text): features = {} text = unicode(text, errors='ignore') txt_lwr = str(text).lower() words = nltk.word_tokenize(txt_lwr) words = [w for w in words if len(w) > 0 and w not in '.?!,;:\'s"$'] unigrams = sorted(list(set(words))) bigram_tokens = nltk.bigrams(words) bigrams = [" ".join([w1, w2]) for w1, w2 in sorted(set(bigram_tokens))] trigram_tokens = nltk.trigrams(words) trigrams = [ " ".join([w1, w2, w3]) for w1, w2, w3 in sorted(set(trigram_tokens)) ] # print words # print unigrams # print bigrams # print trigrams # print "----------------------" # word count features['word_cnt'] = len(words) # unique word count features['unique_word_cnt'] = len(unigrams) # coherence marker count count = count_feature_list_freq(coherence, words, bigrams, trigrams) features['cm_cnt'] = count features['cm_rto'] = round(float(count) / float(len(words)), 4) # degree modifier count count = count_feature_list_freq(modifiers, words, bigrams, trigrams) features['dm_cnt'] = count features['dm_rto'] = round(float(count) / float(len(words)), 4) # hedge word count count = count_feature_list_freq(hedges, words, bigrams, trigrams) features['hedge_cnt'] = count features['hedge_rto'] = round(float(count) / float(len(words)), 4) # factive verb count count = count_feature_list_freq(factives, words, bigrams, trigrams) features['factive_cnt'] = count features['factive_rto'] = round(float(count) / float(len(words)), 4) # assertive verb count count = count_feature_list_freq(assertives, words, bigrams, trigrams) features['assertive_cnt'] = count features['assertive_rto'] = round(float(count) / float(len(words)), 4) # implicative verb count count = count_feature_list_freq(implicatives, words, bigrams, trigrams) features['implicative_cnt'] = count features['implicative_rto'] = round(float(count) / float(len(words)), 4) # bias words and phrases count count = count_feature_list_freq(biased, words, bigrams, trigrams) features['bias_cnt'] = count features['bias_rto'] = round(float(count) / float(len(words)), 4) # opinion word count count = count_feature_list_freq(opinionLaden, words, bigrams, trigrams) features['opinion_cnt'] = count features['opinion_rto'] = round(float(count) / float(len(words)), 4) # weak subjective word count count = count_feature_list_freq(subj_weak, words, bigrams, trigrams) features['subj_weak_cnt'] = count features['subj_weak_rto'] = round(float(count) / float(len(words)), 4) # strong subjective word count count = count_feature_list_freq(subj_strong, words, bigrams, trigrams) features['subj_strong_cnt'] = count features['subj_strong_rto'] = round(float(count) / float(len(words)), 4) # composite sentiment score using VADER sentiment analysis package compound_sentiment = vader_sentiment_analysis.polarity_scores( text)['compound'] features['vader_sentiment'] = compound_sentiment # subjectivity score using Pattern.en pattern_subjectivity = pattern_sentiment(text)[1] features['subjectivity'] = round(pattern_subjectivity, 4) # modality (certainty) score and mood using http://www.clips.ua.ac.be/pages/pattern-en#modality sentence = parse(text, lemmata=True) sentenceObj = Sentence(sentence) features['modality'] = round(modality(sentenceObj), 4) features['mood'] = mood(sentenceObj) # Flesch-Kincaid Grade Level (reading difficulty) using textstat features['fk_gl'] = textstat.flesch_kincaid_grade(text) # liwc 3rd person pronoun count (combines S/he and They) count = count_liwc_list_freq(liwc_3pp, words) features['liwc_3pp_cnt'] = count features['liwc_3pp_rto'] = round(float(count) / float(len(words)), 4) # liwc auxiliary verb count count = count_liwc_list_freq(liwc_aux, words) features['liwc_aux_cnt'] = count features['liwc_aux_rto'] = round(float(count) / float(len(words)), 4) # liwc adverb count count = count_liwc_list_freq(liwc_adv, words) features['liwc_adv_cnt'] = count features['liwc_adv_rto'] = round(float(count) / float(len(words)), 4) # liwc preposition count count = count_liwc_list_freq(liwc_prep, words) features['liwc_prep_cnt'] = count features['liwc_prep_rto'] = round(float(count) / float(len(words)), 4) # liwc conjunction count count = count_liwc_list_freq(liwc_conj, words) features['liwc_conj_cnt'] = count features['liwc_conj_rto'] = round(float(count) / float(len(words)), 4) # liwc discrepency word count count = count_liwc_list_freq(liwc_discr, words) features['liwc_discr_cnt'] = count features['liwc_discr_rto'] = round(float(count) / float(len(words)), 4) # liwc tentative word count count = count_liwc_list_freq(liwc_tent, words) features['liwc_tent_cnt'] = count features['liwc_tent_rto'] = round(float(count) / float(len(words)), 4) # liwc certainty word count count = count_liwc_list_freq(liwc_cert, words) features['liwc_cert_cnt'] = count features['liwc_cert_rto'] = round(float(count) / float(len(words)), 4) # liwc causation word count count = count_liwc_list_freq(liwc_causn, words) features['liwc_causn_cnt'] = count features['liwc_causn_rto'] = round(float(count) / float(len(words)), 4) # liwc work word count count = count_liwc_list_freq(liwc_work, words) features['liwc_work_cnt'] = count features['liwc_work_rto'] = round(float(count) / float(len(words)), 4) # liwc achievement word count count = count_liwc_list_freq(liwc_achiev, words) features['liwc_achiev_cnt'] = count features['liwc_achiev_rto'] = round(float(count) / float(len(words)), 4) return features
def printReview(sentence, tagger,nlp): POStags=['NN','RB','VB','JJ','MD','PR'] terms = nltk.word_tokenize(sentence.lower()) #print(tagger.tag(terms)) POSterms = getPOSterms(terms,POStags,tagger) nouns = POSterms['NN'] adverbs = POSterms['RB'] verbs = POSterms['VB'] adjectives = POSterms['JJ'] modalAuxilary = POSterms['MD'] pronouns = POSterms['PR'] #print(adverbs) #print(adjectives) #print(nouns) if(len(terms) > 3): fourgrams = ngrams(terms,4) for tg in fourgrams: case1 = tg[0] in nouns and tg[1] in verbs and tg[2] in adverbs and tg[3] in adjectives case2 = tg[0] in nouns and tg[1] in verbs and tg[2] in adjectives case3 = tg[0] in adverbs and tg[1] in adjectives and tg[2] in nouns case4 = tg[0] in nouns and tg[1] in verbs and tg[3] in adjectives case5 = tg[1] in nouns and tg[2] in verbs and tg[3] in adjectives case6 = tg[1] in adverbs and tg[2] in adjectives and tg[3] in nouns case7 = tg[0] in adjectives and tg[1] in adjectives and tg[2] in adjectives and tg[3] in nouns case8 = tg[0] in pronouns and tg[1] in modalAuxilary and tg[2] in verbs #case9 = tg[1] in adjectives and tg[2] in nouns and tg[3] in nouns case10 = tg[0] in pronouns and tg[1] in modalAuxilary and tg[3] in verbs case11 = tg[1] in verbs and tg[2] in adverbs and tg[3] in adjectives case12 = tg[1] in adjectives and tg[2] in adjectives and tg[3] in nouns # case13 = tg[1] in verbs and tg[2] in pronouns and tg[3] in nouns # case14 = tg[0] in verbs and tg[1] in pronouns and tg[3] in nouns # case15 = tg[0] in pronouns and tg[1] in verbs and tg[3] in nouns # case16 = tg[0] in pronouns and tg[1] in verbs and tg[3] in adjectives #print(tg) if(case1 or case2 or case3 or case4 or case5 or case6 or case7 or case8 or case10 or case11 or case12): #print(tg) #print('case1',case1,'case2',case2,'case3',case3,'case4',case4,'case5',case5,'case6',case6) #print('case7',case7,'case8',case8,'case10',case10,'case11',case11,'case12',case12) return(sentence) #if(case13 or case14 or case15 or case16): #print(tg) #print('case11',case11,'case12',case12,'case13',case13,'case14',case14) # return(sentence) sentence = sentence.translate(str.maketrans('','',string.punctuation)) #specialCase1 recommend #print(sentence) for word in sentence.lower().strip().split(sep=' '): if word == 'recommend': return(sentence) #specialCase2 must or must've for word in sentence.lower().strip().split(sep=' '): if word == 'must' or word == "must've": return(sentence) #specialCase3 amazing for word in sentence.lower().strip().split(sep=' '): if word == 'amazing': return(sentence) #specialCase4 Dont miss or Do not miss notContains = sentence.find('Dont miss') == -1 and sentence.find('Do not miss') == -1 if not notContains: return(sentence) #specialCase5 definitely for word in sentence.lower().strip().split(sep=' '): if word == 'definitely': return(sentence) #specialCase6 amazing for word in sentence.lower().strip().split(sep=' '): if word == 'delicious' or word == 'fantastic': return(sentence) #specialCase7 byob for word in sentence.lower().strip().split(sep=' '): if word == 'byob': return(sentence) #specialCase8 bland for word in sentence.lower().strip().split(sep=' '): if word == 'bland': return(sentence) #specialCase9 reservation for word in sentence.lower().strip().split(sep=' '): if word == 'reservation': return(sentence) #specialCase10 imperative doc = nlp(sentence) hasSubject = False for xx in doc: if xx.dep_ == 'nsubj': hasSubject = True if hasSubject and mood(sentence) == 'imperative': return(sentence) #specialCase11 subject of sentence is you if hasSubject and xx.text == 'you' : return(sentence) '''subchk = []
def extract_bias_features(text): features = {} txt_lwr = str(text).lower() words = nltk.word_tokenize(txt_lwr) words = [w for w in words if len(w) > 0 and w not in '.?!,;:\'s"$'] if len(words) < 1: return None unigrams = sorted(list(set(words))) bigram_tokens = nltk.bigrams(words) bigrams = [" ".join([w1, w2]) for w1, w2 in sorted(set(bigram_tokens))] trigram_tokens = nltk.trigrams(words) trigrams = [" ".join([w1, w2, w3]) for w1, w2, w3 in sorted(set(trigram_tokens))] # print words # print unigrams # print bigrams # print trigrams # print "----------------------" # word count features['word_count'] = float(len(words)) # unique word count features['unique_word_count'] = float(len(unigrams)) # coherence marker count count, instances = count_feature_list_freq(coherence, words, bigrams, trigrams) # if count > 0: features['coherence_marker_count'] = count features['coherence_marker_prop'] = round(float(count) / float(len(words)), 4) features['coherence_marker_list'] = instances # degree modifier count count, instances = count_feature_list_freq(modifiers, words, bigrams, trigrams) #if count > 0: features['degree_modifier_count'] = count features['degree_modifier_prop'] = round(float(count) / float(len(words)), 4) features['degree_modifier_list'] = instances # hedge word count count, instances = count_feature_list_freq(hedges, words, bigrams, trigrams) #if count > 0: features['hedge_word_count'] = count features['hedge_word_prop'] = round(float(count) / float(len(words)), 4) features['hedge_word_list'] = instances # factive verb count count, instances = count_feature_list_freq(factives, words, bigrams, trigrams) #if count > 0: features['factive_verb_count'] = count features['factive_verb_prop'] = round(float(count) / float(len(words)), 4) features['factive_verb_list'] = instances # assertive verb count count, instances = count_feature_list_freq(assertives, words, bigrams, trigrams) #if count > 0: features['assertive_verb_count'] = count features['assertive_verb_prop'] = round(float(count) / float(len(words)), 4) features['assertive_verb_list'] = instances # implicative verb count count, instances = count_feature_list_freq(implicatives, words, bigrams, trigrams) #if count > 0: features['implicative_verb_count'] = count features['implicative_verb_prop'] = round(float(count) / float(len(words)), 4) features['implicative_verb_list'] = instances # bias words and phrases count count, instances = count_feature_list_freq(biased, words, bigrams, trigrams) #if count > 0: features['bias_count'] = count features['bias_prop'] = round(float(count) / float(len(words)), 4) features['bias_list'] = instances # opinion word count count, instances = count_feature_list_freq(opinionLaden, words, bigrams, trigrams) #if count > 0: features['opinion_count'] = count features['opinion_prop'] = round(float(count) / float(len(words)), 4) features['opinion_list'] = instances # weak subjective word count count, instances = count_feature_list_freq(subj_weak, words, bigrams, trigrams) #if count > 0: features['subjective_weak_count'] = count features['subjective_weak_prop'] = round(float(count) / float(len(words)), 4) features['subjective_weak_list'] = instances # strong subjective word count count, instances = count_feature_list_freq(subj_strong, words, bigrams, trigrams) #if count > 0: features['subjective_strong_count'] = count features['subjective_strong_prop'] = round(float(count) / float(len(words)), 4) features['subjective_strong_list'] = instances # composite sentiment score using VADER sentiment analysis package compound_sentiment = vader_sentiment_analysis.polarity_scores(text)['compound'] features['vader_composite_sentiment'] = float(compound_sentiment) # subjectivity score using Pattern.en pattern_subjectivity = pattern_sentiment(text)[1] features['subjectivity_score'] = round(pattern_subjectivity, 4) # modality (certainty) score and mood using http://www.clips.ua.ac.be/pages/pattern-en#modality sentence = parse(text, lemmata=True) sentenceObj = Sentence(sentence) features['modality'] = round(modality(sentenceObj), 4) try: features['mood'] = mood(sentenceObj) except IndexError as e: print "IndexError: %s" % e print "Ignoring..." features['mood'] = 'err' # Flesch-Kincaid Grade Level (reading difficulty) using textstat try: features['flesch-kincaid_grade_level'] = float(textstat.flesch_kincaid_grade(text)) except TypeError as e: print "TypeError: %s" % e print "Ignoring..." features['flesch-kincaid_grade_level'] = 0.0 # liwc 3rd person pronoun count (combines S/he and They) count, instances = count_liwc_list_freq(liwc_3pp, words) #if count > 0: features['liwc_3rd_person_pronoum_count'] = count features['liwc_3rd_person_pronoun_prop'] = round(float(count) / float(len(words)), 4) features['liwc_3rd_person_pronoun_list'] = instances # liwc auxiliary verb count count, instances = count_liwc_list_freq(liwc_aux, words) #if count > 0: features['liwc_auxiliary_verb_count'] = count features['liwc_auxiliary_verb_prop'] = round(float(count) / float(len(words)), 4) features['liwc_auxiliary_verb_list'] = instances # liwc adverb count count, instances = count_liwc_list_freq(liwc_adv, words) #if count > 0: features['liwc_adverb_count'] = count features['liwc_adverb_prop'] = round(float(count) / float(len(words)), 4) features['liwc_adverb_list'] = instances # liwc preposition count count, instances = count_liwc_list_freq(liwc_prep, words) #if count > 0: features['liwc_preposition_count'] = count features['liwc_preposition_prop'] = round(float(count) / float(len(words)), 4) features['liwc_preposition_list'] = instances # liwc conjunction count count, instances = count_liwc_list_freq(liwc_conj, words) #if count > 0: features['liwc_conjunction_count'] = count features['liwc_conjunction_prop'] = round(float(count) / float(len(words)), 4) features['liwc_conjunction_list'] = instances # liwc discrepency word count count, instances = count_liwc_list_freq(liwc_discr, words) #if count > 0: features['liwc_discrepency_word_count'] = count features['liwc_discrepency_word_prop'] = round(float(count) / float(len(words)), 4) features['liwc_discrepency_word_list'] = instances # liwc tentative word count count, instances = count_liwc_list_freq(liwc_tent, words) #if count > 0: features['liwc_tentative_word_count'] = count features['liwc_tentative_word_prop'] = round(float(count) / float(len(words)), 4) features['liwc_tentative_word_list'] = instances # liwc certainty word count count, instances = count_liwc_list_freq(liwc_cert, words) #if count > 0: features['liwc_certainty_word_count'] = count features['liwc_certainty_word_prop'] = round(float(count) / float(len(words)), 4) features['liwc_certainty_word_list'] = instances # liwc causation word count count, instances = count_liwc_list_freq(liwc_causn, words) #if count > 0: features['liwc_causation_word_count'] = count features['liwc_causation_word_prop'] = round(float(count) / float(len(words)), 4) features['liwc_causation_word_list'] = instances # liwc work word count count, instances = count_liwc_list_freq(liwc_work, words) #if count > 0: features['liwc_work_word_count'] = count features['liwc_work_word_prop'] = round(float(count) / float(len(words)), 4) features['liwc_work_word_list'] = instances # liwc achievement word count count, instances = count_liwc_list_freq(liwc_achiev, words) #if count > 0: features['liwc_achievement_word_count'] = count features['liwc_achievement_word_prop'] = round(float(count) / float(len(words)), 4) features['liwc_achievement_word_list'] = instances return features
type_of_sent = {} type_of_sent["indicative"] = 0 type_of_sent["imperative"] = 0 type_of_sent["conditional"] = 0 type_of_sent["subjunctive"] = 0 with open(sys.argv[1]) as f: for line in f: line = line.rstrip('\n') line = line.decode('utf-8') sentences = tokenize.sent_tokenize(line) for sentence in sentences: #print sentence #DEBUGGING s = parse(sentence, lemmata=True) s = Sentence(s) #print mood(s) #DEGUGGING mood_type = str(mood(s)) current = type_of_sent[mood_type] current = current + 1 type_of_sent[mood_type] = current print type_of_sent #s = "Some amino acids tend to be acidic while others may be basic." # weaseling #s = parse(s, lemmata=True) #s = Sentence(s) # ##print modality(s) #How sure a sentence is ... not using here #print mood(s)
def process(self, message): # print pattern_en.suggest(message) -- suggestions if message == ">!train": self.train() return "It is nice to learn new stuff." if message == ">!forget": memory.clear() return "I am reborn. So much free space :) maybe you will use files to store memory and not RAM..." if message == ">!load_page": if sessionId not in memory: response = "Hello! My name is Chad and I am passionate about music." response += "We can share our experiences and maybe we can get along." response += "Would you mind telling me your name first?" expect[sessionId] = "name" memory[sessionId] = dict() else: response = "Welcome back!" search.search("new songs") with open('results.json') as data_file: data = json.load(data_file) for i in range(10): if 'musicrecording' in data['items'][i]['pagemap']: mr = data['items'][i]['pagemap']['musicrecording'] which = random.randint(0, len(mr) - 1) if 'name' not in mr[which]: response += " Did you know that " + mr[which][ 'byartist'] + " has released a new song?" else: response += " You can check out this cool song, " + mr[which]['name'] + ", by " + \ mr[which]['byartist'] return response s = nlp.get_sentences(message) doc = spacy_nlp(message) for w in doc: print "(", w, w.dep_, w.pos_, w.head, ")" aiml_sent_type = [] aiml_responses = [] memory_responses = [] sentence_types = [] emotions = [] for sentence in s: sentence_type = self.instant_classifier.classify( dialogue_act_features(sentence)) sentence_types.append(sentence_type) polarity, subjective = pattern_en.sentiment(sentence) sent = pattern_en.parse(sentence, lemmata=True) sent = pattern_en.Sentence(sent) modality = pattern_en.modality(sent) mood = pattern_en.mood(sent) if polarity > 0.8: emotions.append("SUPER HAPPY") elif polarity > 0.3: emotions.append("GOOD SURPRISE") elif polarity < -0.4: emotions.append("FEAR") elif polarity > 0.4: emotions.append("COOL") elif polarity < -0.1: emotions.append("SAD") elif polarity < -0.7: emotions.append("ANGER") else: emotions.append("NEUTER") print sentence_type, polarity, subjective, modality, mood if sentence_type not in ["whQuestion", "ynQuestion"]: try: aiml_sent_type_res = self.kernel.respond( sentence_type, sessionId) except: aiml_sent_type_res = "" aiml_sent_type.append(aiml_sent_type_res) verbs_subj = set() sentence = sentence[0].upper() + sentence[1:] doc = spacy_nlp(sentence) for possible_subject in doc: if (possible_subject.dep == nsubj or possible_subject.dep == nsubjpass) and possible_subject.head.pos == VERB: verbs_subj.add((possible_subject, possible_subject.head)) try: aiml_response = self.kernel.respond(sentence, sessionId) except: aiml_response = "" aiml_responses.append(aiml_response) # MEMORY MODULE memory_msg = "" if sentence_type == "Statement": # insert into memory for i in verbs_subj: subjs = [] subjects = [i[0]] for tok in i[0].children: if tok.dep == conj: subjects.append(tok) for subj in subjects: predec = "" for tok in subj.children: if tok.dep_ == "poss" or tok.dep == amod: predec += tok.lower_ if len(predec) > 0: subjs.append(predec + " " + subj.lower_) else: subjs.append(subj.lower_) vb = i[1].lower_ if vb not in memory[sessionId]: memory[sessionId][vb] = dict() for subj in subjs: for c in i[1].children: if c.dep in [prep]: memory[sessionId][vb][subj] = c.lower_ + " " for c_prep in c.children: if c_prep.dep in [dobj, pobj, attr]: memory[sessionId][vb][ subj] += c_prep.text memory_responses.append( self.kernel.respond( "memorate", sessionId)) elif c.dep in [dobj, pobj, attr]: memory[sessionId][vb][subj] = c.text memory_responses.append( self.kernel.respond("memorate", sessionId)) elif sentence_type == "whQuestion": for i in verbs_subj: subjs = [] subjects = [i[0]] for tok in i[0].children: if tok.dep == conj: subjects.append(tok) for subj in subjects: predec = "" for tok in subj.children: if tok.dep_ == "poss" or tok.dep == amod: predec += tok.lower_ if len(predec) > 0: subjs.append(predec + " " + subj.lower_) else: subjs.append(subj.lower_) max_similarity = 0 verb = i[1].lower_ for j in memory[sessionId]: p_word = spacy_nlp(j) similarity = i[1].similarity(p_word[0]) if similarity > max_similarity: max_similarity = similarity verb = j if max_similarity > 0.5 and verb in memory[sessionId]: num_subjs = len(subjs) memory_msg = "" for subj in subjs: if subj in memory[sessionId][verb]: toks = nlp.tokenize_text(subj) memory_msg = "" for t in toks: if t in first_person: memory_msg += pron_translate[t] + " " else: memory_msg += t + " " num_subjs -= 1 if num_subjs > 2: memory_msg += ", " elif num_subjs == 1: memory_msg += "and " if len(memory_msg) > 0: memory_msg += verb + " " if num_subjs != len(subjs): memory_msg += memory[sessionId][verb][ subjs[-1]] + "." memory_responses.append(memory_msg) arr_response = [] for i in aiml_sent_type: if len(i) > 0: arr_response.append(i) for i in aiml_responses: if len(i) > 0: arr_response.append(i) for i in memory_responses: if len(i) > 0: arr_response.append(i) if len(arr_response) == 0: data = search.search(message) snip = data['items'][0]['snippet'] sents = nlp.get_sentences(snip) arr_response.append(sents[0]) response = "" for i in emotions: try: emoi = self.kernel.respond(i, sessionId) except: emoi = None if emoi is not None: if random.randint(0, 100) < 50: response += " " + emoi + "." break for res in arr_response: if len(res) > 1: response += res + " " # generic response, if no response restoks = nlp.tokenize_text(response) if len(restoks) == 0: idx = random.randint(0, len(sentence_types) - 1) try: aiml_response = self.kernel.respond(sentence_types[idx], sessionId) except: aiml_response = "" response += aiml_response # polarity, subjective = pattern_en.sentiment(response) # sent = pattern_en.parse(sentence, lemmata=True) # sent = pattern_en.Sentence(sent) # modality = pattern_en.modality(sent) # mood = pattern_en.mood(sent) # sentence_type = self.instant_classifier.classify(dialogue_act_features(response)) # print response, polarity, subjective, modality, mood return response
dicWord[nn]=1 else: for w in chunk: if w.type=="JJ": index=c.lower().find(w.string) print index print w if index>0 and judge(c,index): c=c[:index+len(w.string)]+'</span>'+c[index+len(w.string):] c=c[:index]+'<span class=*JJ* >'+c[index:] #print c c='<span class=*sentence* sentiment=*'+str(sentiment(sentence))+'* positive=*'+str(positive(sentence))+'* mood=*'+str(mood(sentence))+'* modality=*'+str(modality(sentence))+'*>'+c+"</span>" c=c.replace('"','*') v.texts=v.texts+c #print c #pdb.set_trace() #print v.texts print v.date #print v.nouns #print v.texts print v.stars cur.execute('insert into wZwZcte4lcbu51NOzCjWbQ values("'+v.date+'","'+v.user+'","'+v.nouns+'","'+str(v.stars)+'" ,"'+v.texts+'")') #cur.execute('create table wordfre(word varchar(20) UNIQUE,uid integer)') cur.close() cx.commit()