def __init__(self, data, remove_stop_word = False, lemmatize = True): self.sentences = [] self.graph = {} self.PRI = {} processor = ProcessData(data,"Stanford", False, lemmatize, remove_stop_word, False) self.sentences = processor.clean_sentences()
def __init__(self, data): self.data = [] for sentence in data: tokens = sent_tokenize(sentence) for s in tokens: if s not in self.data: self.data.append(tokens) self.sentence_weights = {} data_processor = ProcessData(data) self.sentences = data_processor.remove_tags( data_processor.clean_sentences()) self.probabilities = self._get_probabilities(self.sentences)