コード例 #1
0
    def test_parse(self):
        elems = split_html(self.test1_html)

        self.assertEqual(get_meta(elems[0]), ("Der Standard", datetime.datetime(2013,4,2), 1))
        self.assertEqual(get_title(elems[0]), u'SP und VP k\xf6nnten dritte Partei f\xfcr Koalition brauchen')
        self.assertEqual(get_section(elems[0]), u'SEITE 1')
        body = get_body(elems[0])
        self.assertTrue(body.startswith(u'Wien - SP\xd6 und \xd6VP'))
        self.assertTrue(body.endswith("hoffen. (red) Seite 7"))
        self.assertEqual(len(body.split("\n\n")), 3) # no of paragraphs

        self.assertEqual(get_meta(elems[1]), ("Wiener Zeitung", datetime.datetime(2013,4,2), 3))
        self.assertEqual(get_title(elems[1]), u'Politique autrichienne als Vorbild')
        self.assertEqual(get_section(elems[1]), 'Europa@welt')
        body = get_body(elems[1])
        self.assertTrue(body.startswith(u'Frankreichs Botschafter'))
        self.assertTrue(body.endswith("Treffen im Oktober 2012. epa"))
        self.assertEqual(len(body.split("\n\n")), 28) # no of paragraphs

        body = get_body(elems[-1])
        self.assertTrue('<a href="mailto:[email protected]">[email protected]</a>' in body)
コード例 #2
0
ファイル: test_defacto_student.py プロジェクト: BBie/amcat
    def test_parse(self):
        elems = split_html(self.test1_html)

        self.assertEqual(get_meta(elems[0]), ("Der Standard", datetime.datetime(2013,4,2), 1))
        self.assertEqual(get_headline(elems[0]), u'SP und VP k\xf6nnten dritte Partei f\xfcr Koalition brauchen')
        self.assertEqual(get_section(elems[0]), u'SEITE 1')
        body = get_body(elems[0])
        self.assertTrue(body.startswith(u'Wien - SP\xd6 und \xd6VP'))
        self.assertTrue(body.endswith("hoffen. (red) Seite 7"))
        self.assertEqual(len(body.split("\n\n")), 3) # no of paragraphs

        self.assertEqual(get_meta(elems[1]), ("Wiener Zeitung", datetime.datetime(2013,4,2), 3))
        self.assertEqual(get_headline(elems[1]), u'Politique autrichienne als Vorbild')
        self.assertEqual(get_section(elems[1]), 'Europa@welt')
        body = get_body(elems[1])
        self.assertTrue(body.startswith(u'Frankreichs Botschafter'))
        self.assertTrue(body.endswith("Treffen im Oktober 2012. epa"))
        self.assertEqual(len(body.split("\n\n")), 28) # no of paragraphs

        body = get_body(elems[-1])
        self.assertTrue('<a href="mailto:[email protected]">[email protected]</a>' in body)
コード例 #3
0
 def test_articles(self):
     arts = [get_article(x) for x in split_html(self.test1_html)]
     arts2 = [get_article(x) for x in split_html(self.test2_html)]
     self.assertEqual(arts2[-1].title, 'Cafe Puls News 08:00 (08:00) - Peter Kaiser wird angelobt')
     self.assertEqual(arts2[-1].date, datetime.datetime(2013,4,2,8,0))
コード例 #4
0
 def test_split(self):
     elems = split_html(self.test1_html)
     self.assertEqual(len(elems), 21)
コード例 #5
0
ファイル: test_defacto_student.py プロジェクト: BBie/amcat
 def test_articles(self):
     arts = [get_article(x) for x in split_html(self.test1_html)]
     arts2 = [get_article(x) for x in split_html(self.test2_html)]
     self.assertEqual(arts2[-1].headline, 'Cafe Puls News 08:00 (08:00) - Peter Kaiser wird angelobt')
     self.assertEqual(arts2[-1].date, datetime.datetime(2013,4,2,8,0))
コード例 #6
0
ファイル: test_defacto_student.py プロジェクト: BBie/amcat
 def test_split(self):
     elems = split_html(self.test1_html)
     self.assertEqual(len(elems), 21)