def sent_result_subjectivity(text): # Classify a single sentence as subjective/objective using a stored custom classifier. word_tokenizer = regexp.WhitespaceTokenizer() # Tokenize and convert to lower case tokenized_text = [word.lower() for word in word_tokenizer.tokenize(text)] sentim_analyzer = load('files/sa_subjectivity.pickle') label = sentim_analyzer.classify(tokenized_text) return label
def subjectivityScore(docs): # Input: SpaCy doc class # Output: Subjectivity score of document (percent of sentences considered subjective) # Note: takes a few minutes the first time, to train classifier. After takes a few seconds. word_tokenizer = regexp.WhitespaceTokenizer() try: sentiment = load('sa_subjectivity.pickle') except LookupError: sentiment = demo_subjectivity(NaiveBayesClassifier.train, True) subjectivities = [] for doc in docs: tot, subj = 0, 0 for sent in doc.sents: tot += 1 # Tokenize and convert to lower case tokenized = [word.lower() for word in word_tokenizer.tokenize(sent.text)] if sentiment.classify(tokenized) == 'subj': subj += 1 subjectivities.append(subj / tot) return subjectivities
def demo_sent_subjectivity(text): """ Classify a single sentence as subjective or objective using a stored SentimentAnalyzer. :param text: a sentence whose subjectivity has to be classified. """ from nltk.classify import NaiveBayesClassifier from nltk.tokenize import regexp word_tokenizer = regexp.WhitespaceTokenizer() try: sentim_analyzer = load('sa_subjectivity.pickle') except LookupError: print('Cannot find the sentiment analyzer you want to load.') print('Training a new one using NaiveBayesClassifier.') sentim_analyzer = demo_subjectivity(NaiveBayesClassifier.train, True) # Tokenize and convert to lower case tokenized_text = [word.lower() for word in word_tokenizer.tokenize(text)] print(sentim_analyzer.classify(tokenized_text))
import json import string import re import pandas as pd import re import json import csv import tokenize import nltk from nltk.tokenize import regexp word_tokenizer = regexp.WhitespaceTokenizer() docID = "" index_tracker = {} input_dict = {} df = pd.DataFrame(columns=index_tracker.keys()) column_name = [] import yaml # initialise def find_column_name(input_dict, column_name): if type(input_dict) == dict: for key, value in input_dict.items(): if type(value) == dict: if "find" in value.keys(): column_name.append(key) if (key == "output"): for i in value.keys(): column_name.append(i) # print(key,column_name) find_column_name(value, column_name)