Esempio n. 1
0
class TextParserTC(unittest.TestCase):
    def setUp(self):
        self.parser = TextParser()

    def testTitleGuess(
            self
    ):  #XXX: complete this with PDF/PS files before commit time !!!
        """Make sure the title is the filename when we treat a text file
           or no title could be found
        """
        title, text, links, offset = self.parser.parseFile(
            join(DATADIR, 'latin1.txt'), 'latin1.txt', 'ISO-8859-1')
        self.assertEquals(title, 'latin1.txt')
        self.assertEquals(normalizeText(text), "c'est l'ete")
        self.assertEquals(links, [])
        # Now, PS file
        title, text, links, offset = self.parser.parseFile(
            join(DATADIR, 'utf8.ps'), 'utf8.ps', 'UTF-8')
        self.assertEquals(title, 'utf8.ps')
        self.assertEquals(links, [])
        # The PDF (yes, it's important to test this too)
        title, text, links, offset = self.parser.parseFile(
            join(DATADIR, 'utf8.pdf'), 'utf8.pdf', 'UTF-8')
        self.assertEquals(title, 'utf8.pdf')
        self.assertEquals(links, [])
Esempio n. 2
0
class TextParserTC(unittest.TestCase):

    def setUp(self):
        self.parser = TextParser()

    def testTitleGuess(self): #XXX: complete this with PDF/PS files before commit time !!!
        """Make sure the title is the filename when we treat a text file
           or no title could be found
        """
        title, text, links, offset = self.parser.parseFile(join(DATADIR, 'latin1.txt'), 'latin1.txt', 'ISO-8859-1')
        self.assertEquals(title, 'latin1.txt')
        self.assertEquals(normalizeText(text), "c'est l'ete")
        self.assertEquals(links, [])
        # Now, PS file
        title, text, links, offset = self.parser.parseFile(join(DATADIR, 'utf8.ps'), 'utf8.ps', 'UTF-8')
        self.assertEquals(title, 'utf8.ps')
        self.assertEquals(links, [])
        # The PDF (yes, it's important to test this too)
        title, text, links, offset = self.parser.parseFile(join(DATADIR, 'utf8.pdf'), 'utf8.pdf', 'UTF-8')
        self.assertEquals(title, 'utf8.pdf')
        self.assertEquals(links, [])
Esempio n. 3
0
 def setUp(self):
     self.parser = TextParser()
Esempio n. 4
0
 def setUp(self):
     self.parser = TextParser()
Esempio n. 5
0
 def getParser(self):
     if self.OUTPUT_TYPE == 'html':
         return HTMLParser()
     else:
         return TextParser()
Esempio n. 6
0
 def getParser(self):
     return TextParser()