def generation(self): self.tokenized = [nltk.word_tokenize(self.sentences[i]) for i in range(len(self.sentences))] self.generate_average_position() self.types = {} tagger = Tagger(False) for i in range(len(self.tokenized)): typess = tagger.tag_sent(self.tokenized[i]) for j in range(len(typess)): word,val = typess[j] if(not word in self.types): self.types[word] = [] self.types[word].append(val) else: self.types[word].append(val) for element in self.types: most_common,num_most_common = Counter(self.types[element]).most_common(1)[0] self.types[element] =most_common num_sent = 1 for sent in self.tokenized: actual = sent num_word = 1 last = None for mot in actual: actual = None if(not self.isWordIn(mot)): tmp = Etiquette(mot,num_sent,num_word) self.nodes.append(tmp) actual = tmp else: actual = self.get_node_with_value(mot) actual.add_sid_pid(num_sent,num_word) if(num_word>1): last.add_next(actual.get_id()) last = actual num_word +=1 num_sent +=1