Exemple #1
0
 def test_spiderFeedUpdatedEntries(self):
     config.load(configfile)
     self.spiderFeed(testfeed % '4')
     self.assertEqual(2, len(glob.glob(workdir+"/*")))
     data = feedparser.parse(workdir + 
         '/planet.intertwingly.net,2006,testfeed4')
     self.assertEqual(u'three', data.entries[0].content[0].value)
    def test_apply_filter(self):
        config.load(configfile % 'filter')
        splice.apply(self.feeddata)

        # verify that index.html is well formed, has content, and xml:lang
        html = open(os.path.join(workdir, 'index.html')).read()
        self.assertTrue(html.find(' href="http://example.com/default.css"')>=0)
Exemple #3
0
 def test_django_config_context(self):
     config.load('tests/data/filter/django/test.ini')
     feed = open('tests/data/filter/django/test.xml')
     input = feed.read(); feed.close()
     results = dj.run(
         os.path.realpath('tests/data/filter/django/config.html.dj'), input)
     self.assertEqual(results, "Django on Venus\n")
 def test_django_entry_title(self):
     config.load("tests/data/filter/django/test.ini")
     feed = open("tests/data/filter/django/test.xml")
     input = feed.read()
     feed.close()
     results = dj.run(os.path.realpath("tests/data/filter/django/title.html.dj"), input)
     self.assertEqual(results, u"\xa1Atom-Powered <b>Robots</b> Run Amok!\n")
 def test_invalid_default_license(self):
     config.load(configfile)
     for testfeed in testfeeds:
         doc = splice.splice()
         splice.apply(doc.toxml('utf-8'))
     output = open(os.path.join(workdir, 'index.html')).read()
     self.assertTrue('title="License information">License</a>' in output)
Exemple #6
0
    def test_apply_filter_mememe(self):
        config.load(configfile % 'mememe')
        self.apply_fancy()

        with open(os.path.join(workdir, 'index.html')) as html:
            self.assertTrue(html.read().find(
                '<div class="sidebar"><h2>Memes <a href="memes.atom">') >= 0)
 def test_django_entry_title(self):
     config.load('tests/data/filter/django/test.ini')
     feed = open('tests/data/filter/django/test.xml')
     input = feed.read(); feed.close()
     results = dj.run(
         os.path.realpath('tests/data/filter/django/title.html.dj'), input)
     self.assertEqual(results, u"\xa1Atom-Powered Robots Run Amok!\n")
Exemple #8
0
 def test_spiderFeed_retroactive_filter(self):
     config.load(configfile)
     self.spiderFeed(testfeed % '1b')
     self.assertEqual(5, len(glob.glob(workdir+"/*")))
     config.parser.set('Planet', 'filter', 'two')
     self.spiderFeed(testfeed % '1b')
     self.assertEqual(1, len(glob.glob(workdir+"/*")))
 def test_embedded_license(self):
     config.load(configfile)
     for testfeed in testfeeds:
         doc = splice.splice()
         splice.apply(doc.toxml('utf-8'))
     output = open(os.path.join(workdir, 'index.html')).read()
     self.assertTrue('Attribution 3.0 Unported' in output)
 def test_django_config_context(self):
     config.load('tests/data/filter/django/test.ini')
     feed = open('tests/data/filter/django/test.xml')
     input = feed.read(); feed.close()
     results = dj.run(
         os.path.realpath('tests/data/filter/django/config.html.dj'), input)
     self.assertEqual(results, "Django on Venus\n")
 def test_django_entry_title_autoescape_off(self):
     config.load("tests/data/filter/django/test.ini")
     config.parser.set("Planet", "django_autoescape", "off")
     feed = open("tests/data/filter/django/test.xml")
     input = feed.read()
     feed.close()
     results = dj.run(os.path.realpath("tests/data/filter/django/title.html.dj"), input)
     self.assertEqual(results, u"\xa1Atom-Powered <b>Robots</b> Run Amok!\n")
 def test_online_accounts(self):
     config.load('tests/data/config/foaf.ini')
     feeds = config.subscriptions()
     feeds.sort()
     self.assertEqual(['http://api.flickr.com/services/feeds/' +
         'photos_public.gne?id=77366516@N00',
         'http://del.icio.us/rss/eliast',
         'http://torrez.us/feed/rdf'], feeds)
Exemple #13
0
 def test_django_entry_title_autoescape_off(self):
     config.load('tests/data/filter/django/test.ini')
     config.parser.set('Planet', 'django_autoescape', 'off')
     feed = open('tests/data/filter/django/test.xml')
     input = feed.read(); feed.close()
     results = dj.run(
         os.path.realpath('tests/data/filter/django/title.html.dj'), input)
     self.assertEqual(results, u"\xa1Atom-Powered <b>Robots</b> Run Amok!\n")
Exemple #14
0
    def test_apply_filter(self):
        config.load(configfile % 'filter')
        splice.apply(self.feeddata)

        # verify that index.html is well formed, has content, and xml:lang
        html = open(os.path.join(workdir, 'index.html')).read()
        self.assertTrue(
            html.find(' href="http://example.com/default.css"') >= 0)
 def test_django_entry_title_autoescape_off(self):
     config.load('tests/data/filter/django/test.ini')
     config.parser.set('Planet', 'django_autoescape', 'off')
     feed = open('tests/data/filter/django/test.xml')
     input = feed.read(); feed.close()
     results = dj.run(
         os.path.realpath('tests/data/filter/django/title.html.dj'), input)
     self.assertEqual(results, u"\xa1Atom-Powered <b>Robots</b> Run Amok!\n")
Exemple #16
0
 def test_django_entry_title(self):
     config.load('tests/data/filter/django/test.ini')
     feed = open('tests/data/filter/django/test.xml')
     input = feed.read()
     feed.close()
     results = dj.run(
         os.path.realpath('tests/data/filter/django/title.html.dj'), input)
     self.assertEqual(results, "\xc2\xa1Atom-Powered Robots Run Amok!\n")
    def test_xslt_filter(self):
        config.load('tests/data/filter/translate.ini')
        testfile = 'tests/data/filter/category-one.xml'

        input = open(testfile).read()
        output = shell.run(config.filters()[0], input, mode="filter")
        dom = xml.dom.minidom.parseString(output)
        catterm = dom.getElementsByTagName('category')[0].getAttribute('term')
        self.assertEqual('OnE', catterm)
Exemple #18
0
    def test_apply_filter_html(self):
        config.load(configfile % 'html')
        self.apply_asf()

        output = open(os.path.join(workdir, 'index.html')).read()
        self.assertTrue(output.find('/>') >= 0)

        output = open(os.path.join(workdir, 'index.html4')).read()
        self.assertTrue(output.find('/>') < 0)
Exemple #19
0
 def test_online_accounts(self):
     config.load('tests/data/config/foaf.ini')
     feeds = config.subscriptions()
     feeds.sort()
     self.assertEqual([
         'http://api.flickr.com/services/feeds/' +
         'photos_public.gne?id=77366516@N00',
         'http://del.icio.us/rss/eliast', 'http://torrez.us/feed/rdf'
     ], feeds)
    def test_xslt_filter(self):
        config.load('tests/data/filter/translate.ini')
        testfile = 'tests/data/filter/category-one.xml'

        input = open(testfile).read()
        output = shell.run(config.filters()[0], input, mode="filter")
        dom = xml.dom.minidom.parseString(output)
        catterm = dom.getElementsByTagName('category')[0].getAttribute('term')
        self.assertEqual('OnE', catterm)
Exemple #21
0
    def test_apply_filter_html(self):
        config.load(configfile % 'html')
        self.apply_asf()

        with open(os.path.join(workdir, 'index.html')) as html:
            self.assertTrue(html.read().find('/>') >= 0)

        with open(os.path.join(workdir, 'index.html4')) as html:
            self.assertTrue(html.read().find('/>') < 0)
    def test_apply_filter_html(self):
        config.load(configfile % 'html')
        self.apply_asf()

        output = open(os.path.join(workdir, 'index.html')).read()
        self.assertTrue(output.find('/>')>=0)

        output = open(os.path.join(workdir, 'index.html4')).read()
        self.assertTrue(output.find('/>')<0)
 def test_recursive(self):
     config.load('tests/data/config/foaf-deep.ini')
     feeds = config.subscriptions()
     feeds.sort()
     self.assertEqual(['http://api.flickr.com/services/feeds/photos_public.gne?id=77366516@N00',
     'http://del.icio.us/rss/eliast', 'http://del.icio.us/rss/leef',
     'http://del.icio.us/rss/rubys', 'http://intertwingly.net/blog/atom.xml',
     'http://thefigtrees.net/lee/life/atom.xml',
     'http://torrez.us/feed/rdf'], feeds)
 def test_default_license_lookup(self):
     config.load(configfile)
     expected_text = 'Reconocimiento-No comercial-Compartir bajo la' + \
         ' misma licencia 2.5 Colombia'
     for testfeed in testfeeds:
         doc = splice.splice()
         splice.apply(doc.toxml('utf-8'))
     output = open(os.path.join(workdir, 'index.html')).read()
     self.assertTrue(expected_text in output)
Exemple #25
0
    def test_stripAd_yahoo(self):
        testfile = 'tests/data/filter/stripAd-yahoo.xml'
        config.load('tests/data/filter/stripAd-yahoo.ini')

        output = open(testfile).read()
        for filter in config.filters():
            output = shell.run(filter, output, mode="filter")

        dom = xml.dom.minidom.parseString(output)
        excerpt = dom.getElementsByTagName('content')[0]
        self.assertEqual(u'before--after',
                         excerpt.firstChild.firstChild.nodeValue)
    def test_stripAd_yahoo(self):
        testfile = 'tests/data/filter/stripAd-yahoo.xml'
        config.load('tests/data/filter/stripAd-yahoo.ini')

        output = open(testfile).read()
        for filter in config.filters():
            output = shell.run(filter, output, mode="filter")

        dom = xml.dom.minidom.parseString(output)
        excerpt = dom.getElementsByTagName('content')[0]
        self.assertEqual(u'before--after',
            excerpt.firstChild.firstChild.nodeValue)
Exemple #27
0
 def test_recursive(self):
     config.load('tests/data/config/foaf-deep.ini')
     feeds = config.subscriptions()
     feeds.sort()
     self.assertEqual([
         'http://api.flickr.com/services/feeds/photos_public.gne?id=77366516@N00',
         'http://del.icio.us/rss/eliast', 'http://del.icio.us/rss/leef',
         'http://del.icio.us/rss/rubys',
         'http://intertwingly.net/blog/atom.xml',
         'http://thefigtrees.net/lee/life/atom.xml',
         'http://torrez.us/feed/rdf'
     ], feeds)
Exemple #28
0
 def test_multiple_subscriptions(self):
     config.load('tests/data/config/foaf-multiple.ini')
     self.assertEqual(2, len(config.reading_lists()))
     feeds = config.subscriptions()
     feeds.sort()
     self.assertEqual(5, len(feeds))
     self.assertEqual([
         'http://api.flickr.com/services/feeds/' +
         'photos_public.gne?id=77366516@N00',
         'http://api.flickr.com/services/feeds/' +
         'photos_public.gne?id=SOMEID', 'http://del.icio.us/rss/SOMEID',
         'http://del.icio.us/rss/eliast', 'http://torrez.us/feed/rdf'
     ], feeds)
 def test_multiple_subscriptions(self):
     config.load('tests/data/config/foaf-multiple.ini')
     self.assertEqual(2,len(config.reading_lists()))
     feeds = config.subscriptions()
     feeds.sort()
     self.assertEqual(5,len(feeds))
     self.assertEqual(['http://api.flickr.com/services/feeds/' +
         'photos_public.gne?id=77366516@N00',
         'http://api.flickr.com/services/feeds/' +
         'photos_public.gne?id=SOMEID',
         'http://del.icio.us/rss/SOMEID',
         'http://del.icio.us/rss/eliast',
         'http://torrez.us/feed/rdf'], feeds)
    def setUp(self):
        config.load(configfile)

        # Re fetch and/or parse the test feeds
        spider.spiderPlanet()
        global testfeeds
        testfeeds = glob.glob(spider_workdir+"/*")

        try:
            os.makedirs(workdir)
        except:
            self.tearDown()
            os.makedirs(workdir)
    def test_excerpt_lorem_ipsum(self):
        testfile = 'tests/data/filter/excerpt-lorem-ipsum.xml'
        config.load('tests/data/filter/excerpt-lorem-ipsum.ini')

        output = open(testfile).read()
        for filter in config.filters():
            output = shell.run(filter, output, mode="filter")

        dom = xml.dom.minidom.parseString(output)
        excerpt = dom.getElementsByTagName('planet:excerpt')[0]
        self.assertEqual(u'Lorem ipsum dolor sit amet, consectetuer ' +
            u'adipiscing elit. Nullam velit. Vivamus tincidunt, erat ' +
            u'in \u2026', excerpt.firstChild.firstChild.nodeValue)
Exemple #32
0
    def test_expunge(self):
        config.load(configfile)

        # create test entries in cache with correct timestamp
        for entry in glob.glob(testentries):
            e=minidom.parse(entry)
            e.normalize()
            eid = e.getElementsByTagName('id')
            efile = filename(workdir, eid[0].childNodes[0].nodeValue)
            eupdated = e.getElementsByTagName('updated')[0].childNodes[0].nodeValue
            emtime = time.mktime(feedparser._parse_date_w3dtf(eupdated))
            if not eid or not eupdated: continue
            shutil.copyfile(entry, efile)
            os.utime(efile, (emtime, emtime))
  
        # create test feeds in cache
        sources = config.cache_sources_directory()
        for feed in glob.glob(testfeeds):
                f=minidom.parse(feed)
                f.normalize()
                fid = f.getElementsByTagName('id')
                if not fid: continue
                ffile = filename(sources, fid[0].childNodes[0].nodeValue)
                shutil.copyfile(feed, ffile)

        # verify that exactly nine entries + one source dir were produced
        files = glob.glob(workdir+"/*")
        self.assertEqual(10, len(files))

        # verify that exactly four feeds were produced in source dir
        files = glob.glob(sources+"/*")
        self.assertEqual(4, len(files))

        # expunge...
        expungeCache()

        # verify that five entries and one source dir are left
        files = glob.glob(workdir+"/*")
        self.assertEqual(6, len(files))

        # verify that the right five entries are left
        self.assertTrue(os.path.join(workdir,
            'bzr.mfd-consult.dk,2007,venus-expunge-test1,1') in files)
        self.assertTrue(os.path.join(workdir,
            'bzr.mfd-consult.dk,2007,venus-expunge-test2,1') in files)
        self.assertTrue(os.path.join(workdir,
            'bzr.mfd-consult.dk,2007,venus-expunge-test3,3') in files)
        self.assertTrue(os.path.join(workdir,
            'bzr.mfd-consult.dk,2007,venus-expunge-test4,2') in files)
        self.assertTrue(os.path.join(workdir,
            'bzr.mfd-consult.dk,2007,venus-expunge-test4,3') in files)
    def test_expunge(self):
        config.load(configfile)

        # create test entries in cache with correct timestamp
        for entry in glob.glob(testentries):
            e=minidom.parse(entry)
            e.normalize()
            eid = e.getElementsByTagName('id')
            efile = filename(workdir, eid[0].childNodes[0].nodeValue)
            eupdated = e.getElementsByTagName('updated')[0].childNodes[0].nodeValue
            emtime = time.mktime(feedparser._parse_date_w3dtf(eupdated))
            if not eid or not eupdated: continue
            shutil.copyfile(entry, efile)
            os.utime(efile, (emtime, emtime))
  
        # create test feeds in cache
        sources = config.cache_sources_directory()
        for feed in glob.glob(testfeeds):
                f=minidom.parse(feed)
                f.normalize()
                fid = f.getElementsByTagName('id')
                if not fid: continue
                ffile = filename(sources, fid[0].childNodes[0].nodeValue)
                shutil.copyfile(feed, ffile)

        # verify that exactly nine entries + one source dir were produced
        files = glob.glob(workdir+"/*")
        self.assertEqual(10, len(files))

        # verify that exactly four feeds were produced in source dir
        files = glob.glob(sources+"/*")
        self.assertEqual(4, len(files))

        # expunge...
        expungeCache()

        # verify that five entries and one source dir are left
        files = glob.glob(workdir+"/*")
        self.assertEqual(6, len(files))

        # verify that the right five entries are left
        self.assertTrue(os.path.join(workdir,
            'bzr.mfd-consult.dk,2007,venus-expunge-test1,1') in files)
        self.assertTrue(os.path.join(workdir,
            'bzr.mfd-consult.dk,2007,venus-expunge-test2,1') in files)
        self.assertTrue(os.path.join(workdir,
            'bzr.mfd-consult.dk,2007,venus-expunge-test3,3') in files)
        self.assertTrue(os.path.join(workdir,
            'bzr.mfd-consult.dk,2007,venus-expunge-test4,2') in files)
        self.assertTrue(os.path.join(workdir,
            'bzr.mfd-consult.dk,2007,venus-expunge-test4,3') in files)
Exemple #34
0
    def test_excerpt_lorem_ipsum(self):
        testfile = 'tests/data/filter/excerpt-lorem-ipsum.xml'
        config.load('tests/data/filter/excerpt-lorem-ipsum.ini')

        output = open(testfile).read()
        for filter in config.filters():
            output = shell.run(filter, output, mode="filter")

        dom = xml.dom.minidom.parseString(output)
        excerpt = dom.getElementsByTagName('planet:excerpt')[0]
        self.assertEqual(
            u'Lorem ipsum dolor sit amet, consectetuer ' +
            u'adipiscing elit. Nullam velit. Vivamus tincidunt, erat ' +
            u'in \u2026', excerpt.firstChild.firstChild.nodeValue)
    def test_coerce_rss(self):
        config.load(configfile)

        # load first version of RSS
        self.spiderFeed(testfeed % "a-rss-1")

        rss_no_date_expected = self.verify_date("fake.url.example.com,rss-no-date")
        self.verify_date("fake.url.example.com,rss-changing-date", u"2011-12-01T11:00:00Z")

        # parse updated RSS feed
        self.spiderFeed(testfeed % "a-rss-2")

        # verify dates haven't changed
        self.verify_date("fake.url.example.com,rss-no-date", rss_no_date_expected)
        self.verify_date("fake.url.example.com,rss-changing-date", u"2011-12-01T11:00:00Z")
Exemple #36
0
    def test_apply_fancy(self):
        config.load(configfile % 'fancy')
        splice.apply(self.feeddata)

        # verify that selected files are there
        for file in ['index.html', 'planet.css', 'images/jdub.png']:
            path = os.path.join(workdir, file)
            self.assertTrue(os.path.exists(path), path)
            self.assertTrue(os.stat(path).st_size > 0)

        # verify that index.html is well formed, has content, and xml:lang
        html = open(os.path.join(workdir, 'index.html')).read()
        self.assertTrue(html.find('<h1>test planet</h1>')>=0)
        self.assertTrue(html.find(
          '<h4><a href="http://example.com/2">Venus</a></h4>')>=0)
Exemple #37
0
    def test_excerpt_lorem_ipsum_summary(self):
        testfile = 'tests/data/filter/excerpt-lorem-ipsum.xml'
        config.load('tests/data/filter/excerpt-lorem-ipsum.ini')
        config.parser.set('excerpt.py', 'target', 'atom:summary')

        with open(testfile) as fp:
            output = fp.read()
        for each_filter in config.filters():
            output = shell.run(each_filter, output, mode="filter")

        dom = xml.dom.minidom.parseString(output)
        excerpt = dom.getElementsByTagName('summary')[0]
        self.assertEqual(
            u'Lorem ipsum dolor sit amet, consectetuer ' +
            u'adipiscing elit. Nullam velit. Vivamus tincidunt, erat ' +
            u'in \u2026', excerpt.firstChild.firstChild.nodeValue)
    def test_regexp_filter2(self):
        config.load('tests/data/filter/regexp-sifter2.ini')

        testfile = 'tests/data/filter/category-one.xml'

        output = open(testfile).read()
        for filter in config.filters():
            output = shell.run(filter, output, mode="filter")

        self.assertNotEqual('', output)

        testfile = 'tests/data/filter/category-two.xml'

        output = open(testfile).read()
        for filter in config.filters():
            output = shell.run(filter, output, mode="filter")

        self.assertEqual('', output)
Exemple #39
0
    def test_regexp_filter2(self):
        config.load('tests/data/filter/regexp-sifter2.ini')

        testfile = 'tests/data/filter/category-one.xml'

        output = open(testfile).read()
        for filter in config.filters():
            output = shell.run(filter, output, mode="filter")

        self.assertNotEqual('', output)

        testfile = 'tests/data/filter/category-two.xml'

        output = open(testfile).read()
        for filter in config.filters():
            output = shell.run(filter, output, mode="filter")

        self.assertEqual('', output)
Exemple #40
0
    def test_spiderThreads(self):
        config.load(configfile.replace('config', 'threaded'))
        _PORT = config.parser.getint('Planet', 'test_port')

        log = []
        from SimpleHTTPServer import SimpleHTTPRequestHandler

        class TestRequestHandler(SimpleHTTPRequestHandler):
            def log_message(self, format_, *args):
                log.append(args)

        from threading import Thread

        class TestServerThread(Thread):
            def __init__(self):
                self.ready = 0
                self.done = 0
                Thread.__init__(self)

            def run(self):
                from BaseHTTPServer import HTTPServer
                httpd_ = HTTPServer(('', _PORT), TestRequestHandler)
                self.ready = 1
                while not self.done:
                    httpd_.handle_request()

        httpd = TestServerThread()
        httpd.start()
        while not httpd.ready:
            time.sleep(0.1)

        try:
            spiderPlanet()
        finally:
            httpd.done = 1
            import urllib
            urllib.urlopen('http://127.0.0.1:%d/' % _PORT).read()

        status = [int(rec[1]) for rec in log if str(rec[0]).startswith('GET ')]
        status.sort()
        self.assertEqual([200, 200, 200, 200, 404], status)

        self.verify_spiderPlanet()
    def test_coerce_atom(self):
        config.load(configfile)

        # load first version of Atom
        self.spiderFeed(testfeed % "b-atom-1")

        atom_no_date_expected = self.verify_date("fake.url.example.com,atom-no-date")
        self.verify_date("fake.url.example.com,atom-changing-published", u"2011-12-08T02:02:28Z")
        self.verify_date("fake.url.example.com,atom-changing-updated", u"2011-11-09T00:00:28Z")
        self.verify_date("fake.url.example.com,atom-update-before-pub", u"2011-11-11T11:11:11Z")

        # parse updated Atom feed
        self.spiderFeed(testfeed % "b-atom-2")

        # verify dates haven't changed
        self.verify_date("fake.url.example.com,atom-no-date", atom_no_date_expected)
        self.verify_date("fake.url.example.com,atom-changing-published", u"2011-12-08T02:02:28Z")
        self.verify_date("fake.url.example.com,atom-changing-updated", u"2011-11-09T00:00:28Z")
        self.verify_date("fake.url.example.com,atom-update-before-pub", u"2011-11-11T11:11:11Z")
Exemple #42
0
    def eval_config(self, name):
        # read the test case
        try:
            with open(testfiles % (name, 'ini')) as testcasefile:
                data = testcasefile.read()
            description, expect = self.desc_config_re.search(data).groups()
        except:
            raise RuntimeError("can't parse %s" % name)

        # map to template info
        config.load(testfiles % (name, 'ini'))
        results = tmpl.template_info("<feed/>")

        # verify the results
        if not self.simple_re.match(expect):
            self.assertTrue(eval(expect, results), expect)
        else:
            lhs, rhs = self.simple_re.match(expect).groups()
            self.assertEqual(eval(rhs), eval(lhs, results))
    def eval_config(self, name):
        # read the test case
        try:
            testcase = open(testfiles % (name,'ini'))
            data = testcase.read()
            description, expect = self.desc_config_re.search(data).groups()
            testcase.close()
        except:
            raise RuntimeError, "can't parse %s" % name

        # map to template info
        config.load(testfiles % (name,'ini'))
        results = tmpl.template_info("<feed/>")

        # verify the results
        if not self.simple_re.match(expect):
            self.assertTrue(eval(expect, results), expect)
        else:
            lhs, rhs = self.simple_re.match(expect).groups()
            self.assertEqual(eval(rhs), eval(lhs, results))
    def test_spiderFeed_blacklist(self):
        config.load(configfile)
        self.spiderFeed(testfeed % '1b')

        # verify that exactly four entries were produced
        self.assertEqual(4, len(glob.glob(workdir+"/planet*")))

        # verify that the file names are as expected
        self.assertTrue(os.path.exists(os.path.join(workdir,
            'planet.intertwingly.net,2006,testfeed1,1')))
        
        os.mkdir(os.path.join(workdir, "blacklist"))

        os.rename(os.path.join(workdir,
            'planet.intertwingly.net,2006,testfeed1,1'),
                  os.path.join(workdir, "blacklist", 
            'planet.intertwingly.net,2006,testfeed1,1'))

	self.spiderFeed(testfeed % '1b')
        self.assertEqual(3, len(glob.glob(workdir+"/planet*")))
    def test_index_splice(self):
        import test_splice
        config.load(test_splice.configfile)
        index = idindex.create()

        self.assertEqual(12, len(index))
        self.assertEqual('tag:planet.intertwingly.net,2006:testfeed1', index['planet.intertwingly.net,2006,testfeed1,1'])
        self.assertEqual('http://intertwingly.net/code/venus/tests/data/spider/testfeed3.rss', index['planet.intertwingly.net,2006,testfeed3,1'])

        for key in index.keys():
            value = index[key]
            if value.find('testfeed2')>0: index[key] = value.swapcase()
        index.close()

        from planet.splice import splice
        doc = splice()

        self.assertEqual(8,len(doc.getElementsByTagName('entry')))
        self.assertEqual(4,len(doc.getElementsByTagName('planet:source')))
        self.assertEqual(12,len(doc.getElementsByTagName('planet:name')))
    def test_index_spider(self):
        import test_spider
        config.load(test_spider.configfile)

        index = idindex.create()
        self.assertEqual(0, len(index))
        index.close()

        from planet.spider import spiderPlanet
        try:
            spiderPlanet()

            index = idindex.open()
            self.assertEqual(12, len(index))
            self.assertEqual('tag:planet.intertwingly.net,2006:testfeed1', index['planet.intertwingly.net,2006,testfeed1,1'])
            self.assertEqual('http://intertwingly.net/code/venus/tests/data/spider/testfeed3.rss', index['planet.intertwingly.net,2006,testfeed3,1'])
            index.close()
        finally:
            import os, shutil
            shutil.rmtree(test_spider.workdir)
            os.removedirs(os.path.split(test_spider.workdir)[0])
Exemple #47
0
    def test_spiderThreads(self):
        config.load(configfile.replace('config','threaded'))
        _PORT = config.parser.getint('Planet','test_port')

        log = []
        from SimpleHTTPServer import SimpleHTTPRequestHandler
        class TestRequestHandler(SimpleHTTPRequestHandler):
            def log_message(self, format, *args):
                log.append(args)

        from threading import Thread
        class TestServerThread(Thread):
          def __init__(self):
              self.ready = 0
              self.done = 0
              Thread.__init__(self)
          def run(self):
              from BaseHTTPServer import HTTPServer
              httpd = HTTPServer(('',_PORT), TestRequestHandler)
              self.ready = 1
              while not self.done:
                  httpd.handle_request()

        httpd = TestServerThread()
        httpd.start()
        while not httpd.ready:
            time.sleep(0.1)

        try:
            spiderPlanet()
        finally:
            httpd.done = 1
            import urllib
            urllib.urlopen('http://127.0.0.1:%d/' % _PORT).read()

        status = [int(rec[1]) for rec in log if str(rec[0]).startswith('GET ')]
        status.sort()
        self.assertEqual([200,200,200,200,404], status)

        self.verify_spiderPlanet()
Exemple #48
0
    def test_spiderFeed_blacklist(self):
        config.load(configfile)
        self.spiderFeed(testfeed % '1b')

        # verify that exactly four entries were produced
        self.assertEqual(4, len(glob.glob(workdir + "/planet*")))

        # verify that the file names are as expected
        self.assertTrue(
            os.path.exists(
                os.path.join(workdir,
                             'planet.intertwingly.net,2006,testfeed1,1')))

        os.mkdir(os.path.join(workdir, "blacklist"))

        os.rename(
            os.path.join(workdir, 'planet.intertwingly.net,2006,testfeed1,1'),
            os.path.join(workdir, "blacklist",
                         'planet.intertwingly.net,2006,testfeed1,1'))

        self.spiderFeed(testfeed % '1b')
        self.assertEqual(3, len(glob.glob(workdir + "/planet*")))
Exemple #49
0
    def test_apply_asf(self):
        config.load(configfile % 'asf')
        splice.apply(self.feeddata)

        # verify that selected files are there
        for file in ['index.html', 'default.css', 'images/foaf.png']:
            path = os.path.join(workdir, file)
            self.assertTrue(os.path.exists(path))
            self.assertTrue(os.stat(path).st_size > 0, file + ' has size 0')

        # verify that index.html is well formed, has content, and xml:lang
        html = open(os.path.join(workdir, 'index.html'))
        doc = minidom.parse(html)
        list = []
        content = lang = 0
        for div in doc.getElementsByTagName('div'):
            if div.getAttribute('class') != 'content': continue
            content += 1
            if div.getAttribute('xml:lang') == 'en-us': lang += 1
        html.close()
        self.assertEqual(12, content)
        self.assertEqual(3, lang)
Exemple #50
0
    def test_index_spider(self):
        import test_spider
        config.load(test_spider.configfile)

        index = idindex.create()
        self.assertEqual(0, len(index))
        index.close()

        from planet.spider import spiderPlanet
        try:
            spiderPlanet()

            index = idindex.open()
            self.assertEqual(12, len(index))
            self.assertEqual('tag:planet.intertwingly.net,2006:testfeed1',
                             index['planet.intertwingly.net,2006,testfeed1,1'])
            self.assertEqual(
                'http://intertwingly.net/code/venus/tests/data/spider/testfeed3.rss',
                index['planet.intertwingly.net,2006,testfeed3,1'])
            index.close()
        finally:
            shutil.rmtree(test_spider.workdir)
            os.removedirs(os.path.split(test_spider.workdir)[0])
Exemple #51
0
    def test_index_splice(self):
        import test_splice
        config.load(test_splice.configfile)
        index = idindex.create()

        self.assertEqual(12, len(index))
        self.assertEqual('tag:planet.intertwingly.net,2006:testfeed1',
                         index['planet.intertwingly.net,2006,testfeed1,1'])
        self.assertEqual(
            'http://intertwingly.net/code/venus/tests/data/spider/testfeed3.rss',
            index['planet.intertwingly.net,2006,testfeed3,1'])

        for key in index.keys():
            value = index[key]
            if value.find('testfeed2') > 0: index[key] = value.swapcase()
        index.close()

        from planet.splice import splice
        doc = splice()

        self.assertEqual(8, len(doc.getElementsByTagName('entry')))
        self.assertEqual(4, len(doc.getElementsByTagName('planet:source')))
        self.assertEqual(12, len(doc.getElementsByTagName('planet:name')))
Exemple #52
0
                        source[0].childNodes[0].nodeValue
                doc.freeDoc()
            except:
                log.error(file)

    log.info(str(len(index.keys())) + " entries indexed")
    index.close()

    return open()


if __name__ == '__main__':
    if len(sys.argv) < 2:
        print 'Usage: %s [-c|-d]' % sys.argv[0]
        sys.exit(1)

    config.load(sys.argv[1])

    if len(sys.argv) > 2 and sys.argv[2] == '-c':
        create()
    elif len(sys.argv) > 2 and sys.argv[2] == '-d':
        destroy()
    else:
        from planet import logger as log
        index = open()
        if index:
            log.info(str(len(index.keys())) + " entries indexed")
            index.close()
        else:
            log.info("no entries indexed")
Exemple #53
0
 def test_spiderUpdate(self):
     config.load(configfile)
     self.spiderFeed(testfeed % '1a')
     self.spiderFeed(testfeed % '1b')
     self.verify_spiderFeed()
Exemple #54
0
 def test_spiderPlanet(self):
     config.load(configfile)
     spiderPlanet()
     self.verify_spiderPlanet()
Exemple #55
0
 def setUp(self):
     config.load('tests/data/config/themed.ini')
Exemple #56
0
            expunge = 1
        elif arg == "-d" or arg == "--debug-splice":
            debug_splice = 1
        elif arg == "--no-publish":
            no_publish = 1
        elif arg.startswith("-"):
            print >> sys.stderr, "Unknown option:", arg
            sys.exit(1)
        else:
            config_file.append(arg)

    import locale
    locale.setlocale(locale.LC_ALL, "")

    from planet import config
    config.load(config_file or 'config.ini')

    if verbose:
        import planet
        planet.getLogger('DEBUG', config.log_format())

    if not offline:
        from planet import spider
        try:
            spider.spiderPlanet(only_if_new=only_if_new)
        except Exception, e:
            print e

    from planet import splice
    doc = splice.splice()
Exemple #57
0
a major change in the contract between stages
"""

import shutil, os, sys

# move up a directory
sys.path.insert(0, os.path.split(sys.path[0])[0])
os.chdir(sys.path[0])

# copy spider output to splice input
import planet
from planet import spider, config

planet.getLogger('CRITICAL', None)

config.load('tests/data/spider/config.ini')
spider.spiderPlanet()
if os.path.exists('tests/data/splice/cache'):
    shutil.rmtree('tests/data/splice/cache')
shutil.move('tests/work/spider/cache', 'tests/data/splice/cache')

source = open('tests/data/spider/config.ini')
dest1 = open('tests/data/splice/config.ini', 'w')
dest1.write(source.read().replace('/work/spider/', '/data/splice/'))
dest1.close()

source.seek(0)
dest2 = open('tests/work/apply_config.ini', 'w')
dest2.write(source.read().replace(
    '[Planet]', '''[Planet]
output_theme = asf
Exemple #58
0
 def setUp(self):
     config.load('tests/data/config/rlist-csv.ini')