def preprocess(self):
        """ Preprocess the suspicious and source document. """
        susp_fp = codecs.open(self.susp, 'r', 'utf-8')
        self.susp_text = susp_fp.read()
        self.susp_bow = Preprocessing.tokenize(self.susp_text, self.susp_offsets, self.susp_sents)
        Preprocessing.ss_treat(self.susp_bow, self.susp_offsets, self.min_sentlen, self.rssent)
        susp_fp.close()

        src_fp = codecs.open(self.src, 'r', 'utf-8')
        self.src_text = src_fp.read()
        self.src_bow = Preprocessing.tokenize(self.src_text, self.src_offsets, self.src_sents)
        Preprocessing.ss_treat(self.src_bow, self.src_offsets, self.min_sentlen, self.rssent)
        src_fp.close()