def __init__(self, cacm_words_path, common_words_filename):
        with open(cacm_words_path, 'r') as f:
            self.lines = f.readlines()
        self.current_line_number = 0

        self.document_begin_regex = re.compile('^\.I\s(?P<id>\d*)')
        self.category_markers = [
            '.I', '.T', '.W', '.B', '.A', '.N', '.X', '.K', '.C'
        ]

        self.documents = []
        self.text_processor = Process_text(common_words_filename)