class TextParserTC(unittest.TestCase): def setUp(self): self.parser = TextParser() def testTitleGuess( self ): #XXX: complete this with PDF/PS files before commit time !!! """Make sure the title is the filename when we treat a text file or no title could be found """ title, text, links, offset = self.parser.parseFile( join(DATADIR, 'latin1.txt'), 'latin1.txt', 'ISO-8859-1') self.assertEquals(title, 'latin1.txt') self.assertEquals(normalizeText(text), "c'est l'ete") self.assertEquals(links, []) # Now, PS file title, text, links, offset = self.parser.parseFile( join(DATADIR, 'utf8.ps'), 'utf8.ps', 'UTF-8') self.assertEquals(title, 'utf8.ps') self.assertEquals(links, []) # The PDF (yes, it's important to test this too) title, text, links, offset = self.parser.parseFile( join(DATADIR, 'utf8.pdf'), 'utf8.pdf', 'UTF-8') self.assertEquals(title, 'utf8.pdf') self.assertEquals(links, [])
class TextParserTC(unittest.TestCase): def setUp(self): self.parser = TextParser() def testTitleGuess(self): #XXX: complete this with PDF/PS files before commit time !!! """Make sure the title is the filename when we treat a text file or no title could be found """ title, text, links, offset = self.parser.parseFile(join(DATADIR, 'latin1.txt'), 'latin1.txt', 'ISO-8859-1') self.assertEquals(title, 'latin1.txt') self.assertEquals(normalizeText(text), "c'est l'ete") self.assertEquals(links, []) # Now, PS file title, text, links, offset = self.parser.parseFile(join(DATADIR, 'utf8.ps'), 'utf8.ps', 'UTF-8') self.assertEquals(title, 'utf8.ps') self.assertEquals(links, []) # The PDF (yes, it's important to test this too) title, text, links, offset = self.parser.parseFile(join(DATADIR, 'utf8.pdf'), 'utf8.pdf', 'UTF-8') self.assertEquals(title, 'utf8.pdf') self.assertEquals(links, [])
def setUp(self): self.parser = TextParser()
def getParser(self): if self.OUTPUT_TYPE == 'html': return HTMLParser() else: return TextParser()
def getParser(self): return TextParser()