def removeNegationsPipeline(sentence: str, contractions: dict): '''takes in a sentence and returns a pos_tagged list of the words with contractions removed Parameters ---------- sentence:str sentence we wish to remove the negations out of contractions: dict dictionary that maps contractions to their expansions Returns ------- list: list of wordnet part_of_speech tokens each of which are in the form (word, part of speech, version) ''' tokens = tp.tokenize_words(sentence) tokens = expandContractions(tokens, contractions) pos_tokens = tp.simple_pos_tag(tokens) pos_tokens = tp.remove_pos_stopwords(pos_tokens) return removeNegations(pos_tokens)
def swn_pipeline(s: str, label_diff_thresh=0, contractions=None, emoti_dict=None, special={}, count_nouns=True): """pipline that takes in a sentence string and returns the appropriate label: Parameters ---------- s: str sentence to find the sentiment of DEFAULTS: label_diff_thresh:int = 0 how far we want the positive and negative score to differ to be considered not neutral contractions: dict dictionary of contractions mapped to their expanded forms emoti_dict: dict dictionary of emoticons mapped to their label special: dict dictionary of specialized lexicon to score words on top of SentiWordNet count_nouns=true: determines whether or not to include nouns in the scoring Returns ------- tuple(str, list) tuple where first item is sentiment label of s (-1, 0, or 1) and second item is a list of the words with their scores """ ##preliminary pre processing (emoticons, phrases, etc) s = extra_preprocessing(s, emoti_dict) # if got a lable if s == "0" or s == "1" or s == "-1": return (s, []) ##text processing if contractions is None: # if not want to remove contractions tokens = tp.tokenize_words(s) pos_tokens = tp.simple_pos_tag(tokens) pos_tokens = tp.remove_pos_stopwords(pos_tokens) else: # if want to remove negation contractions pos_tokens = rneg.removeNegationsPipeline(s, contractions) return swn_label(pos_tokens, diff_thresh=label_diff_thresh, special=special, count_nouns=count_nouns)