def preprocess(self, force=False): if force is True or self._html is None: preprocessor = PreProcessor(self._content, base_url=self.base_url) self._title, self._html = preprocessor.process()
def testPreprocess(self): pre_process = PreProcessor(self.html, self.base_url) title, body = pre_process.process() self.assertTrue(u'百度' in title) self.assertGreater(len(body.text), 0)