def __load_sentences(self): tokenized_sentences = [] capitalization = defaultdict(partial(set)) parser = Parser() type, sentence = parser.next() while sentence: if type == 'tweet': tokens = twokenize.tokenize(sentence) else: tokens = word_tokenize(sentence) self.__track_capitalization(capitalization, tokens) tokenized_sentences.append(tokens) type, sentence = parser.next() return capitalization, tokenized_sentences