Exemple #1
0
 def test_comment(self):
     lexer = Lexer()
     tokens = lexer.tokenize(
         '<!-- In the interest of restricting article length, please limit this section to '
         'two or three short paragraphs and add any substantial information to the main Issues '
         'in anarchism article. Thank you. -->')
     logging.info(tokens)
     self.assertGreater(len(tokens), 0)
Exemple #2
0
    def test_tokenize(self, name='wikitext'):
        """

        Test tokenizer

        """
        with (DATA_FOLDER / name).open(encoding="utf8") as f:
            text = f.read()
            lexer = Lexer()
            tokens = lexer.tokenize(text)
            logging.info(tokens)
            logging.info('TEXT_LENGTH: {0}'.format(len(text)))
            self.assertGreater(len(tokens), 0)