Beispiel #1
0
    def test_make_table(self):
        testFn = os.path.join(TEST_DIR, 'plan.pdf')
        with open(testFn, 'rb') as testf:
            objs = mensa2json.analyze_pages(mensa2json.parsePDF(testf))
        texts = filter(mensa2json.accept_text,
                       objs[pdfminer.layout.LTTextLineHorizontal])
        t = mensa2json.make_table(texts)

        # First Column
        assert u'Essen I' in t[0][0].get_text()
        assert u'Hauptkomponente' in t[0][1].get_text()
        assert u'Essen II' in t[0][2].get_text()
        assert u'Hauptkomponente' in t[0][3].get_text()
        assert u'Beilagenauswahl' in t[0][4].get_text()
        assert u'Essen I und II' in t[0][5].get_text()
        assert u'Eintöpfe' in t[0][6].get_text()
        assert u'Pfanne' in t[0][7].get_text()
        assert u'Aktionsstand' in t[0][8].get_text()
        assert u'Wok' in t[0][9].get_text()
        assert u'Gratin' in t[0][10].get_text()

        # Upper-most text (~= first row)
        assert u'Essen I' in t[0][0].get_text()
        assert u'Montag' in t[1][0].get_text()
        assert u'Dienstag' in t[2][0].get_text()
        assert u'Mittwoch' in t[3][0].get_text()
        assert u'Donnerstag' in t[4][0].get_text()
        assert u'Freitag ' in t[5][0].get_text()
Beispiel #2
0
    def test_make_table(self):
        testFn = os.path.join(TEST_DIR, 'plan.pdf')
        with open(testFn, 'rb') as testf:
            objs = mensa2json.analyze_pages(mensa2json.parsePDF(testf))
        texts = filter(mensa2json.accept_text, objs[pdfminer.layout.LTTextLineHorizontal])
        t = mensa2json.make_table(texts)

        # First Column
        assert u'Essen I' in t[0][0].get_text()
        assert u'Hauptkomponente' in t[0][1].get_text()
        assert u'Essen II' in t[0][2].get_text()
        assert u'Hauptkomponente' in t[0][3].get_text()
        assert u'Beilagenauswahl' in t[0][4].get_text()
        assert u'Essen I und II' in t[0][5].get_text()
        assert u'Eintöpfe' in t[0][6].get_text()
        assert u'Pfanne' in t[0][7].get_text()
        assert u'Aktionsstand' in t[0][8].get_text()
        assert u'Wok' in t[0][9].get_text()
        assert u'Gratin' in t[0][10].get_text()

        # Upper-most text (~= first row)
        assert u'Essen I' in t[0][0].get_text()
        assert u'Montag' in t[1][0].get_text()
        assert u'Dienstag' in t[2][0].get_text()
        assert u'Mittwoch' in t[3][0].get_text()
        assert u'Donnerstag' in t[4][0].get_text()
        assert u'Freitag ' in t[5][0].get_text()
Beispiel #3
0
    def test_analyze_pages(self):
        testFn = os.path.join(TEST_DIR, 'plan.pdf')
        with open(testFn, 'rb') as testf:
            objs = mensa2json.analyze_pages(mensa2json.parsePDF(testf))
        texts = filter(mensa2json.accept_text, objs[pdfminer.layout.LTTextLineHorizontal])
        beilagenauswahl = next(o for o in texts if u'Beilagenauswahl' in o.get_text())
        pfanne = next(o for o in texts if u'Pfanne' in o.get_text())
        wok = next(o for o in texts if u'Wok' in o.get_text())
        greencorner = next(o for o in texts if u'Green Corner' in o.get_text())

        # per-page
        assert beilagenauswahl.ey0 > pfanne.ey0
        assert beilagenauswahl.ey1 > pfanne.ey1
        assert wok.ey0 > greencorner.ey0
        assert wok.ey1 > greencorner.ey1

        assert beilagenauswahl.ey0 > wok.ey0
        assert beilagenauswahl.ey0 > greencorner.ey0
        assert pfanne.ey0 > wok.ey0
        assert pfanne.ey0 > greencorner.ey0
Beispiel #4
0
    def test_analyze_pages(self):
        testFn = os.path.join(TEST_DIR, 'plan.pdf')
        with open(testFn, 'rb') as testf:
            objs = mensa2json.analyze_pages(mensa2json.parsePDF(testf))
        texts = filter(mensa2json.accept_text,
                       objs[pdfminer.layout.LTTextLineHorizontal])
        beilagenauswahl = next(o for o in texts
                               if u'Beilagenauswahl' in o.get_text())
        pfanne = next(o for o in texts if u'Pfanne' in o.get_text())
        wok = next(o for o in texts if u'Wok' in o.get_text())
        greencorner = next(o for o in texts if u'Green Corner' in o.get_text())

        # per-page
        assert beilagenauswahl.ey0 > pfanne.ey0
        assert beilagenauswahl.ey1 > pfanne.ey1
        assert wok.ey0 > greencorner.ey0
        assert wok.ey1 > greencorner.ey1

        assert beilagenauswahl.ey0 > wok.ey0
        assert beilagenauswahl.ey0 > greencorner.ey0
        assert pfanne.ey0 > wok.ey0
        assert pfanne.ey0 > greencorner.ey0