Esempio n. 1
0
    def testGetTextsFromCulinaria(self):
        from TextsCrawler import Parser

        pars = Parser()
        texts = pars.getTextsFromCulinaria("http://feeds2.feedburner.com/CulinariaEReceitas")
        for text in texts:
            print "---------------------------------------------------"
            print "title:", text
            print "text:", texts[text]
Esempio n. 2
0
    def testGetTextsFromArtigosCom(self):
        from TextsCrawler import Parser

        pars = Parser()
        texts = pars.getTextsFromArtigosCom("http://www.artigos.com/option,com_mtree/task,rss/type,new/Itemid,61/")
        for text in texts:
            print "---------------------------------------------------"
            print "title:", text
            print "text:", texts[text]
Esempio n. 3
0
    def testGetTextWebArtigo(self):
        from TextsCrawler import Parser

        pars = Parser()
        texts = pars.getTextWebArtigo("http://www.webartigos.com/articlerss/cat/17/")
        for text in texts:
            print "---------------------------------------------------"
            print "title:", text
            print "text:", texts[text]
Esempio n. 4
0
    def testGetHTML(self):
        from TextsCrawler import Parser

        pars = Parser()
        html = pars._getHTML("http://perdu.com")
        self.assertEqual(
            html,
            "<html><head><title>Vous Etes Perdu ?</title></head><body><h1>Perdu sur l'Internet ?</h1><h2>Pas de panique, on va vous aider</h2><strong><pre>    * <----- vous &ecirc;tes ici</pre></strong></body></html>\n",
        )
Esempio n. 5
0
    def testGetTextsFromCrondia(self):
        from TextsCrawler import Parser

        pars = Parser()
        texts = pars.getTextsFromCrondia("http://crondia.blogspot.com/feeds/posts/default?alt=rss")
        for text in texts:
            print "---------------------------------------------------"
            print "title:", text
            print "text:", texts[text]
Esempio n. 6
0
    def testGetTextsFromUOL(self):
        from TextsCrawler import Parser

        pars = Parser()
        texts = pars.getTextsFromUOL("http://feeds.folha.uol.com.br/esporte/rss091.xml")
        for text in texts:
            print "---------------------------------------------------"
            print "title:", text
            print "text:", texts[text]
Esempio n. 7
0
    def testGetTextsFromGlobo(self):
        from TextsCrawler import Parser

        pars = Parser()
        texts = pars.getTextsFromGlobo("http://oglobo.globo.com/rss/plantao.xml")
        for text in texts:
            print "---------------------------------------------------"
            print "title:", text
            print "text:", texts[text]
Esempio n. 8
0
    def testGetTextsHoroscopo(self):
        from TextsCrawler import Parser

        pars = Parser()
        texts = pars.getTextsHoroscopo("http://www.jornaldelondrina.com.br/horoscopo/")
        for text in texts:
            print "---------------------------------------------------"
            print "title:", text
            print "text:", texts[text]
Esempio n. 9
0
    def testGetTextsArtigonal(self):
        from TextsCrawler import Parser

        pars = Parser()
        texts = pars.getTextsFromArtigonal("http://www.artigonal.com/rss/")
        for text in texts:
            print "---------------------------------------------------"
            print "title:", text
            print "text:", texts[text]
Esempio n. 10
0
    def testGetTextsFromartigosEtc(self):
        from TextsCrawler import Parser

        pars = Parser()
        texts = pars.getTextsFromArtigosEtc("http://www.artigos.etc.br/feed")
        for text in texts:
            print "---------------------------------------------------"
            print "title:", text
            print "text:", texts[text]
Esempio n. 11
0
    def testhtml2text(self):
        from TextsCrawler import Parser

        pars = Parser()
        text = pars._html2text("&#156;")
Esempio n. 12
0
    def testGetLinks(self):
        from TextsCrawler import Parser

        pars = Parser()
        links = pars._getLinks("http://feedparser.org/docs/examples/atom10.xml")
        self.assertEqual(links, {u"First entry title": u"http://example.org/entry/3"})