def __init__(self, tokenized, threshold, ignore_list): self.threshold = threshold self.tokenized = tokenized self.indexes = list(enumerate(self.tokenized)) self.pos_tags = nltk.pos_tag(self.tokenized) if ignore_list == []: self.ignore_index = [ c for (a, b), (c, d) in zip(self.pos_tags, self.indexes) if 'P' in b ] else: self.ignore_index = ignore_list self.complex_words = [(a, b) for a, b in list( zip([a for a, b in self.indexes], complex_word.get_complex_words(self.tokenized))) if b > self.threshold] self.complex_words = [(a, b) for a, b in self.complex_words if a not in self.ignore_index] self.complex_words = sorted(self.complex_words, key=lambda x: x[1], reverse=True)
def make_simplification(self, synonym, index): tokens = self.tokenized del tokens[index] for i, word in enumerate(synonym): tokens.insert((index + i), word) self.add_ignore(index) self.tokenized = tokens self.indexes = list(enumerate(self.tokenized)) self.pos_tags = nltk.pos_tag(self.tokenized) self.complex_words = [(a, b) for a, b in list( zip([a for a, b in self.indexes], complex_word.get_complex_words(self.tokenized))) if b > self.threshold] self.complex_words = [(a, b) for a, b in self.complex_words if a not in self.ignore_index] self.complex_words = sorted(self.complex_words, key=lambda x: x[1], reverse=True)