コード例 #1
0
 def __init__(self, tokenized, threshold, ignore_list):
     self.threshold = threshold
     self.tokenized = tokenized
     self.indexes = list(enumerate(self.tokenized))
     self.pos_tags = nltk.pos_tag(self.tokenized)
     if ignore_list == []:
         self.ignore_index = [
             c for (a, b), (c, d) in zip(self.pos_tags, self.indexes)
             if 'P' in b
         ]
     else:
         self.ignore_index = ignore_list
     self.complex_words = [(a, b) for a, b in list(
         zip([a for a, b in self.indexes],
             complex_word.get_complex_words(self.tokenized)))
                           if b > self.threshold]
     self.complex_words = [(a, b) for a, b in self.complex_words
                           if a not in self.ignore_index]
     self.complex_words = sorted(self.complex_words,
                                 key=lambda x: x[1],
                                 reverse=True)
コード例 #2
0
    def make_simplification(self, synonym, index):

        tokens = self.tokenized

        del tokens[index]

        for i, word in enumerate(synonym):
            tokens.insert((index + i), word)
            self.add_ignore(index)

        self.tokenized = tokens

        self.indexes = list(enumerate(self.tokenized))
        self.pos_tags = nltk.pos_tag(self.tokenized)

        self.complex_words = [(a, b) for a, b in list(
            zip([a for a, b in self.indexes],
                complex_word.get_complex_words(self.tokenized)))
                              if b > self.threshold]
        self.complex_words = [(a, b) for a, b in self.complex_words
                              if a not in self.ignore_index]
        self.complex_words = sorted(self.complex_words,
                                    key=lambda x: x[1],
                                    reverse=True)