def prepare_stopwords(): NEGATE = ["aint", "arent", "cannot", "cant", "couldnt", "darent", "didnt", "doesnt", "ain't", "aren't", "can't", "couldn't", "daren't", "didn't", "doesn't", "dont", "hadnt", "hasnt", "havent", "isnt", "mightnt", "mustnt", "neither", "don't", "hadn't", "hasn't", "haven't", "isn't", "mightn't", "mustn't", "neednt", "needn't", "never", "none", "nope", "nor", "not", "nothing", "nowhere","no", "oughtnt", "shant", "shouldnt", "uhuh", "wasnt", "werent", "oughtn't", "shan't", "shouldn't", "uh-uh", "wasn't", "weren't", "without", "wont", "wouldnt", "won't", "wouldn't", "rarely", "seldom", "despite"] stopwords = STOP_WORDS.copy() for word in STOP_WORDS: if word in NEGATE: stopwords.remove(word) return stopwords
import spacy from spacy.lang.en.stop_words import STOP_WORDS nlp = spacy.load('en_core_web_md') domain_stop_words = ['chapter', '<', '>', ';', 'vinegar', 'of', '%'] for word in domain_stop_words: STOP_WORDS.add(word) STOP_WORDS1 = STOP_WORDS.copy() STOP_WORDS1.discard('other') def nlp0(sentence): sentence = sentence.lower() word_list = [ token.lemma_ for token in nlp(sentence) if not token.is_stop and not token.is_punct ] return word_list def nlp1(sentence): sentence = sentence.lower() word_list = [ str(token.lemma_) for token in nlp(sentence) if str(token) not in STOP_WORDS1 and not token.is_punct ] word_list1 = [] flag = 0 for i in word_list: if i == 'other':