def __init__(self, cacm_words_path, common_words_filename): with open(cacm_words_path, 'r') as f: self.lines = f.readlines() self.current_line_number = 0 self.document_begin_regex = re.compile('^\.I\s(?P<id>\d*)') self.category_markers = [ '.I', '.T', '.W', '.B', '.A', '.N', '.X', '.K', '.C' ] self.documents = [] self.text_processor = Process_text(common_words_filename)