def run_text_analysis(self): self.word_distribution = textutility.get_word_distribution(self.text) self.unigrams = self.word_distribution.keys()[:100] self.bigrams = textutility.get_bigram_collocation(self.text) self.trigrams = textutility.get_trigram_collocation(self.text) self.length = len(textutility.get_words(self.text)) self.unique_length = len(textutility.unique_words(self.text)) self.person_entities = textutility.get_person_entities(self.text) self.location_entities = textutility.get_location_entities(self.text) self.organization_entities = textutility.get_organization_entities(self.text) self.all_named_entities = textutility.get_all_named_entities(self.text)
def calculate_sentiment(self): sentence_positive = 0 sentence_negative = 0 sentence_objective = 0 sentence_subjective = 0 sentence_total = 0 sentence_sentiments = [] entity_sentiments = {} for entity in self.entities: entity_sentiments[entity] = 0 #build sentiment score past_words = [] article_sentences = textutility.sentence_tokenize(self.text) pos_tokens = textutility.pos_tag(article_sentences) for idx, sentence_token in enumerate(pos_tokens): sentence_words = textutility.get_lowercase(textutility.get_words(article_sentences[idx])) # The pattern.en sentiment() function returns a (polarity, subjectivity)-tuple for the given sentence, based on the adjectives it contains, where polarity is a value between -1.0 and +1.0 and subjectivity between 0.0 and 1.0. patternsent(article_sentences[idx]) for token in sentence_token: word = token[0].strip() if word in self.english_stops: continue; sanitized = self.wordnet_sanitize(token[0], token[1]) if(sanitized[1] is None): word_synset = wn.synsets(sanitized[0]) else: word_synset = wn.synsets(sanitized[0], sanitized[1]) if word_synset: word_synset = word_synset[0] sentiment_synset = swn.senti_synset(word_synset.name()) if sentiment_synset: sentence_total = ((sentiment_synset.pos_score() - sentiment_synset.neg_score()) * (1 - sentiment_synset.obj_score())) #weight subjective words if(True in past_words): sentence_positive += sentiment_synset.neg_score() sentence_negative += sentiment_synset.pos_score() sentence_total = sentence_total*-1 else: sentence_positive += sentiment_synset.pos_score() sentence_negative += sentiment_synset.neg_score() sentence_objective += sentiment_synset.obj_score() sentence_subjective += (1 - sentiment_synset.obj_score()) negation = False if(word in self.negation_words or (len(word) > 2 and word[-3:] == "n't")): negation = True past_words.append(negation) if(len(past_words) > 3): past_words.pop(0) self.total_scores['positive'] += sentence_positive self.total_scores['negative'] += sentence_negative self.total_scores['objective'] += sentence_objective self.total_scores['subjective'] += sentence_subjective self.total_scores['aggregate'] += sentence_total sentence_sentiments.append(sentence_total) for entity in self.entities: if entity in sentence_words: entity_sentiments[entity] += sentence_total sentence_positive = 0 sentence_negative = 0 sentence_objective = 0 sentence_subjective = 0 sentence_total = 0 self.total_scores['by_sentences'] = sentence_sentiments