def test_spiderFeedUpdatedEntries(self): config.load(configfile) self.spiderFeed(testfeed % '4') self.assertEqual(2, len(glob.glob(workdir+"/*"))) data = feedparser.parse(workdir + '/planet.intertwingly.net,2006,testfeed4') self.assertEqual(u'three', data.entries[0].content[0].value)
def test_apply_filter(self): config.load(configfile % 'filter') splice.apply(self.feeddata) # verify that index.html is well formed, has content, and xml:lang html = open(os.path.join(workdir, 'index.html')).read() self.assertTrue(html.find(' href="http://example.com/default.css"')>=0)
def test_django_config_context(self): config.load('tests/data/filter/django/test.ini') feed = open('tests/data/filter/django/test.xml') input = feed.read(); feed.close() results = dj.run( os.path.realpath('tests/data/filter/django/config.html.dj'), input) self.assertEqual(results, "Django on Venus\n")
def test_django_entry_title(self): config.load("tests/data/filter/django/test.ini") feed = open("tests/data/filter/django/test.xml") input = feed.read() feed.close() results = dj.run(os.path.realpath("tests/data/filter/django/title.html.dj"), input) self.assertEqual(results, u"\xa1Atom-Powered <b>Robots</b> Run Amok!\n")
def test_invalid_default_license(self): config.load(configfile) for testfeed in testfeeds: doc = splice.splice() splice.apply(doc.toxml('utf-8')) output = open(os.path.join(workdir, 'index.html')).read() self.assertTrue('title="License information">License</a>' in output)
def test_apply_filter_mememe(self): config.load(configfile % 'mememe') self.apply_fancy() with open(os.path.join(workdir, 'index.html')) as html: self.assertTrue(html.read().find( '<div class="sidebar"><h2>Memes <a href="memes.atom">') >= 0)
def test_django_entry_title(self): config.load('tests/data/filter/django/test.ini') feed = open('tests/data/filter/django/test.xml') input = feed.read(); feed.close() results = dj.run( os.path.realpath('tests/data/filter/django/title.html.dj'), input) self.assertEqual(results, u"\xa1Atom-Powered Robots Run Amok!\n")
def test_spiderFeed_retroactive_filter(self): config.load(configfile) self.spiderFeed(testfeed % '1b') self.assertEqual(5, len(glob.glob(workdir+"/*"))) config.parser.set('Planet', 'filter', 'two') self.spiderFeed(testfeed % '1b') self.assertEqual(1, len(glob.glob(workdir+"/*")))
def test_embedded_license(self): config.load(configfile) for testfeed in testfeeds: doc = splice.splice() splice.apply(doc.toxml('utf-8')) output = open(os.path.join(workdir, 'index.html')).read() self.assertTrue('Attribution 3.0 Unported' in output)
def test_django_entry_title_autoescape_off(self): config.load("tests/data/filter/django/test.ini") config.parser.set("Planet", "django_autoescape", "off") feed = open("tests/data/filter/django/test.xml") input = feed.read() feed.close() results = dj.run(os.path.realpath("tests/data/filter/django/title.html.dj"), input) self.assertEqual(results, u"\xa1Atom-Powered <b>Robots</b> Run Amok!\n")
def test_online_accounts(self): config.load('tests/data/config/foaf.ini') feeds = config.subscriptions() feeds.sort() self.assertEqual(['http://api.flickr.com/services/feeds/' + 'photos_public.gne?id=77366516@N00', 'http://del.icio.us/rss/eliast', 'http://torrez.us/feed/rdf'], feeds)
def test_django_entry_title_autoescape_off(self): config.load('tests/data/filter/django/test.ini') config.parser.set('Planet', 'django_autoescape', 'off') feed = open('tests/data/filter/django/test.xml') input = feed.read(); feed.close() results = dj.run( os.path.realpath('tests/data/filter/django/title.html.dj'), input) self.assertEqual(results, u"\xa1Atom-Powered <b>Robots</b> Run Amok!\n")
def test_apply_filter(self): config.load(configfile % 'filter') splice.apply(self.feeddata) # verify that index.html is well formed, has content, and xml:lang html = open(os.path.join(workdir, 'index.html')).read() self.assertTrue( html.find(' href="http://example.com/default.css"') >= 0)
def test_django_entry_title(self): config.load('tests/data/filter/django/test.ini') feed = open('tests/data/filter/django/test.xml') input = feed.read() feed.close() results = dj.run( os.path.realpath('tests/data/filter/django/title.html.dj'), input) self.assertEqual(results, "\xc2\xa1Atom-Powered Robots Run Amok!\n")
def test_xslt_filter(self): config.load('tests/data/filter/translate.ini') testfile = 'tests/data/filter/category-one.xml' input = open(testfile).read() output = shell.run(config.filters()[0], input, mode="filter") dom = xml.dom.minidom.parseString(output) catterm = dom.getElementsByTagName('category')[0].getAttribute('term') self.assertEqual('OnE', catterm)
def test_apply_filter_html(self): config.load(configfile % 'html') self.apply_asf() output = open(os.path.join(workdir, 'index.html')).read() self.assertTrue(output.find('/>') >= 0) output = open(os.path.join(workdir, 'index.html4')).read() self.assertTrue(output.find('/>') < 0)
def test_online_accounts(self): config.load('tests/data/config/foaf.ini') feeds = config.subscriptions() feeds.sort() self.assertEqual([ 'http://api.flickr.com/services/feeds/' + 'photos_public.gne?id=77366516@N00', 'http://del.icio.us/rss/eliast', 'http://torrez.us/feed/rdf' ], feeds)
def test_apply_filter_html(self): config.load(configfile % 'html') self.apply_asf() with open(os.path.join(workdir, 'index.html')) as html: self.assertTrue(html.read().find('/>') >= 0) with open(os.path.join(workdir, 'index.html4')) as html: self.assertTrue(html.read().find('/>') < 0)
def test_apply_filter_html(self): config.load(configfile % 'html') self.apply_asf() output = open(os.path.join(workdir, 'index.html')).read() self.assertTrue(output.find('/>')>=0) output = open(os.path.join(workdir, 'index.html4')).read() self.assertTrue(output.find('/>')<0)
def test_recursive(self): config.load('tests/data/config/foaf-deep.ini') feeds = config.subscriptions() feeds.sort() self.assertEqual(['http://api.flickr.com/services/feeds/photos_public.gne?id=77366516@N00', 'http://del.icio.us/rss/eliast', 'http://del.icio.us/rss/leef', 'http://del.icio.us/rss/rubys', 'http://intertwingly.net/blog/atom.xml', 'http://thefigtrees.net/lee/life/atom.xml', 'http://torrez.us/feed/rdf'], feeds)
def test_default_license_lookup(self): config.load(configfile) expected_text = 'Reconocimiento-No comercial-Compartir bajo la' + \ ' misma licencia 2.5 Colombia' for testfeed in testfeeds: doc = splice.splice() splice.apply(doc.toxml('utf-8')) output = open(os.path.join(workdir, 'index.html')).read() self.assertTrue(expected_text in output)
def test_stripAd_yahoo(self): testfile = 'tests/data/filter/stripAd-yahoo.xml' config.load('tests/data/filter/stripAd-yahoo.ini') output = open(testfile).read() for filter in config.filters(): output = shell.run(filter, output, mode="filter") dom = xml.dom.minidom.parseString(output) excerpt = dom.getElementsByTagName('content')[0] self.assertEqual(u'before--after', excerpt.firstChild.firstChild.nodeValue)
def test_recursive(self): config.load('tests/data/config/foaf-deep.ini') feeds = config.subscriptions() feeds.sort() self.assertEqual([ 'http://api.flickr.com/services/feeds/photos_public.gne?id=77366516@N00', 'http://del.icio.us/rss/eliast', 'http://del.icio.us/rss/leef', 'http://del.icio.us/rss/rubys', 'http://intertwingly.net/blog/atom.xml', 'http://thefigtrees.net/lee/life/atom.xml', 'http://torrez.us/feed/rdf' ], feeds)
def test_multiple_subscriptions(self): config.load('tests/data/config/foaf-multiple.ini') self.assertEqual(2, len(config.reading_lists())) feeds = config.subscriptions() feeds.sort() self.assertEqual(5, len(feeds)) self.assertEqual([ 'http://api.flickr.com/services/feeds/' + 'photos_public.gne?id=77366516@N00', 'http://api.flickr.com/services/feeds/' + 'photos_public.gne?id=SOMEID', 'http://del.icio.us/rss/SOMEID', 'http://del.icio.us/rss/eliast', 'http://torrez.us/feed/rdf' ], feeds)
def test_multiple_subscriptions(self): config.load('tests/data/config/foaf-multiple.ini') self.assertEqual(2,len(config.reading_lists())) feeds = config.subscriptions() feeds.sort() self.assertEqual(5,len(feeds)) self.assertEqual(['http://api.flickr.com/services/feeds/' + 'photos_public.gne?id=77366516@N00', 'http://api.flickr.com/services/feeds/' + 'photos_public.gne?id=SOMEID', 'http://del.icio.us/rss/SOMEID', 'http://del.icio.us/rss/eliast', 'http://torrez.us/feed/rdf'], feeds)
def setUp(self): config.load(configfile) # Re fetch and/or parse the test feeds spider.spiderPlanet() global testfeeds testfeeds = glob.glob(spider_workdir+"/*") try: os.makedirs(workdir) except: self.tearDown() os.makedirs(workdir)
def test_excerpt_lorem_ipsum(self): testfile = 'tests/data/filter/excerpt-lorem-ipsum.xml' config.load('tests/data/filter/excerpt-lorem-ipsum.ini') output = open(testfile).read() for filter in config.filters(): output = shell.run(filter, output, mode="filter") dom = xml.dom.minidom.parseString(output) excerpt = dom.getElementsByTagName('planet:excerpt')[0] self.assertEqual(u'Lorem ipsum dolor sit amet, consectetuer ' + u'adipiscing elit. Nullam velit. Vivamus tincidunt, erat ' + u'in \u2026', excerpt.firstChild.firstChild.nodeValue)
def test_expunge(self): config.load(configfile) # create test entries in cache with correct timestamp for entry in glob.glob(testentries): e=minidom.parse(entry) e.normalize() eid = e.getElementsByTagName('id') efile = filename(workdir, eid[0].childNodes[0].nodeValue) eupdated = e.getElementsByTagName('updated')[0].childNodes[0].nodeValue emtime = time.mktime(feedparser._parse_date_w3dtf(eupdated)) if not eid or not eupdated: continue shutil.copyfile(entry, efile) os.utime(efile, (emtime, emtime)) # create test feeds in cache sources = config.cache_sources_directory() for feed in glob.glob(testfeeds): f=minidom.parse(feed) f.normalize() fid = f.getElementsByTagName('id') if not fid: continue ffile = filename(sources, fid[0].childNodes[0].nodeValue) shutil.copyfile(feed, ffile) # verify that exactly nine entries + one source dir were produced files = glob.glob(workdir+"/*") self.assertEqual(10, len(files)) # verify that exactly four feeds were produced in source dir files = glob.glob(sources+"/*") self.assertEqual(4, len(files)) # expunge... expungeCache() # verify that five entries and one source dir are left files = glob.glob(workdir+"/*") self.assertEqual(6, len(files)) # verify that the right five entries are left self.assertTrue(os.path.join(workdir, 'bzr.mfd-consult.dk,2007,venus-expunge-test1,1') in files) self.assertTrue(os.path.join(workdir, 'bzr.mfd-consult.dk,2007,venus-expunge-test2,1') in files) self.assertTrue(os.path.join(workdir, 'bzr.mfd-consult.dk,2007,venus-expunge-test3,3') in files) self.assertTrue(os.path.join(workdir, 'bzr.mfd-consult.dk,2007,venus-expunge-test4,2') in files) self.assertTrue(os.path.join(workdir, 'bzr.mfd-consult.dk,2007,venus-expunge-test4,3') in files)
def test_excerpt_lorem_ipsum(self): testfile = 'tests/data/filter/excerpt-lorem-ipsum.xml' config.load('tests/data/filter/excerpt-lorem-ipsum.ini') output = open(testfile).read() for filter in config.filters(): output = shell.run(filter, output, mode="filter") dom = xml.dom.minidom.parseString(output) excerpt = dom.getElementsByTagName('planet:excerpt')[0] self.assertEqual( u'Lorem ipsum dolor sit amet, consectetuer ' + u'adipiscing elit. Nullam velit. Vivamus tincidunt, erat ' + u'in \u2026', excerpt.firstChild.firstChild.nodeValue)
def test_coerce_rss(self): config.load(configfile) # load first version of RSS self.spiderFeed(testfeed % "a-rss-1") rss_no_date_expected = self.verify_date("fake.url.example.com,rss-no-date") self.verify_date("fake.url.example.com,rss-changing-date", u"2011-12-01T11:00:00Z") # parse updated RSS feed self.spiderFeed(testfeed % "a-rss-2") # verify dates haven't changed self.verify_date("fake.url.example.com,rss-no-date", rss_no_date_expected) self.verify_date("fake.url.example.com,rss-changing-date", u"2011-12-01T11:00:00Z")
def test_apply_fancy(self): config.load(configfile % 'fancy') splice.apply(self.feeddata) # verify that selected files are there for file in ['index.html', 'planet.css', 'images/jdub.png']: path = os.path.join(workdir, file) self.assertTrue(os.path.exists(path), path) self.assertTrue(os.stat(path).st_size > 0) # verify that index.html is well formed, has content, and xml:lang html = open(os.path.join(workdir, 'index.html')).read() self.assertTrue(html.find('<h1>test planet</h1>')>=0) self.assertTrue(html.find( '<h4><a href="http://example.com/2">Venus</a></h4>')>=0)
def test_excerpt_lorem_ipsum_summary(self): testfile = 'tests/data/filter/excerpt-lorem-ipsum.xml' config.load('tests/data/filter/excerpt-lorem-ipsum.ini') config.parser.set('excerpt.py', 'target', 'atom:summary') with open(testfile) as fp: output = fp.read() for each_filter in config.filters(): output = shell.run(each_filter, output, mode="filter") dom = xml.dom.minidom.parseString(output) excerpt = dom.getElementsByTagName('summary')[0] self.assertEqual( u'Lorem ipsum dolor sit amet, consectetuer ' + u'adipiscing elit. Nullam velit. Vivamus tincidunt, erat ' + u'in \u2026', excerpt.firstChild.firstChild.nodeValue)
def test_regexp_filter2(self): config.load('tests/data/filter/regexp-sifter2.ini') testfile = 'tests/data/filter/category-one.xml' output = open(testfile).read() for filter in config.filters(): output = shell.run(filter, output, mode="filter") self.assertNotEqual('', output) testfile = 'tests/data/filter/category-two.xml' output = open(testfile).read() for filter in config.filters(): output = shell.run(filter, output, mode="filter") self.assertEqual('', output)
def test_spiderThreads(self): config.load(configfile.replace('config', 'threaded')) _PORT = config.parser.getint('Planet', 'test_port') log = [] from SimpleHTTPServer import SimpleHTTPRequestHandler class TestRequestHandler(SimpleHTTPRequestHandler): def log_message(self, format_, *args): log.append(args) from threading import Thread class TestServerThread(Thread): def __init__(self): self.ready = 0 self.done = 0 Thread.__init__(self) def run(self): from BaseHTTPServer import HTTPServer httpd_ = HTTPServer(('', _PORT), TestRequestHandler) self.ready = 1 while not self.done: httpd_.handle_request() httpd = TestServerThread() httpd.start() while not httpd.ready: time.sleep(0.1) try: spiderPlanet() finally: httpd.done = 1 import urllib urllib.urlopen('http://127.0.0.1:%d/' % _PORT).read() status = [int(rec[1]) for rec in log if str(rec[0]).startswith('GET ')] status.sort() self.assertEqual([200, 200, 200, 200, 404], status) self.verify_spiderPlanet()
def test_coerce_atom(self): config.load(configfile) # load first version of Atom self.spiderFeed(testfeed % "b-atom-1") atom_no_date_expected = self.verify_date("fake.url.example.com,atom-no-date") self.verify_date("fake.url.example.com,atom-changing-published", u"2011-12-08T02:02:28Z") self.verify_date("fake.url.example.com,atom-changing-updated", u"2011-11-09T00:00:28Z") self.verify_date("fake.url.example.com,atom-update-before-pub", u"2011-11-11T11:11:11Z") # parse updated Atom feed self.spiderFeed(testfeed % "b-atom-2") # verify dates haven't changed self.verify_date("fake.url.example.com,atom-no-date", atom_no_date_expected) self.verify_date("fake.url.example.com,atom-changing-published", u"2011-12-08T02:02:28Z") self.verify_date("fake.url.example.com,atom-changing-updated", u"2011-11-09T00:00:28Z") self.verify_date("fake.url.example.com,atom-update-before-pub", u"2011-11-11T11:11:11Z")
def eval_config(self, name): # read the test case try: with open(testfiles % (name, 'ini')) as testcasefile: data = testcasefile.read() description, expect = self.desc_config_re.search(data).groups() except: raise RuntimeError("can't parse %s" % name) # map to template info config.load(testfiles % (name, 'ini')) results = tmpl.template_info("<feed/>") # verify the results if not self.simple_re.match(expect): self.assertTrue(eval(expect, results), expect) else: lhs, rhs = self.simple_re.match(expect).groups() self.assertEqual(eval(rhs), eval(lhs, results))
def eval_config(self, name): # read the test case try: testcase = open(testfiles % (name,'ini')) data = testcase.read() description, expect = self.desc_config_re.search(data).groups() testcase.close() except: raise RuntimeError, "can't parse %s" % name # map to template info config.load(testfiles % (name,'ini')) results = tmpl.template_info("<feed/>") # verify the results if not self.simple_re.match(expect): self.assertTrue(eval(expect, results), expect) else: lhs, rhs = self.simple_re.match(expect).groups() self.assertEqual(eval(rhs), eval(lhs, results))
def test_spiderFeed_blacklist(self): config.load(configfile) self.spiderFeed(testfeed % '1b') # verify that exactly four entries were produced self.assertEqual(4, len(glob.glob(workdir+"/planet*"))) # verify that the file names are as expected self.assertTrue(os.path.exists(os.path.join(workdir, 'planet.intertwingly.net,2006,testfeed1,1'))) os.mkdir(os.path.join(workdir, "blacklist")) os.rename(os.path.join(workdir, 'planet.intertwingly.net,2006,testfeed1,1'), os.path.join(workdir, "blacklist", 'planet.intertwingly.net,2006,testfeed1,1')) self.spiderFeed(testfeed % '1b') self.assertEqual(3, len(glob.glob(workdir+"/planet*")))
def test_index_splice(self): import test_splice config.load(test_splice.configfile) index = idindex.create() self.assertEqual(12, len(index)) self.assertEqual('tag:planet.intertwingly.net,2006:testfeed1', index['planet.intertwingly.net,2006,testfeed1,1']) self.assertEqual('http://intertwingly.net/code/venus/tests/data/spider/testfeed3.rss', index['planet.intertwingly.net,2006,testfeed3,1']) for key in index.keys(): value = index[key] if value.find('testfeed2')>0: index[key] = value.swapcase() index.close() from planet.splice import splice doc = splice() self.assertEqual(8,len(doc.getElementsByTagName('entry'))) self.assertEqual(4,len(doc.getElementsByTagName('planet:source'))) self.assertEqual(12,len(doc.getElementsByTagName('planet:name')))
def test_index_spider(self): import test_spider config.load(test_spider.configfile) index = idindex.create() self.assertEqual(0, len(index)) index.close() from planet.spider import spiderPlanet try: spiderPlanet() index = idindex.open() self.assertEqual(12, len(index)) self.assertEqual('tag:planet.intertwingly.net,2006:testfeed1', index['planet.intertwingly.net,2006,testfeed1,1']) self.assertEqual('http://intertwingly.net/code/venus/tests/data/spider/testfeed3.rss', index['planet.intertwingly.net,2006,testfeed3,1']) index.close() finally: import os, shutil shutil.rmtree(test_spider.workdir) os.removedirs(os.path.split(test_spider.workdir)[0])
def test_spiderThreads(self): config.load(configfile.replace('config','threaded')) _PORT = config.parser.getint('Planet','test_port') log = [] from SimpleHTTPServer import SimpleHTTPRequestHandler class TestRequestHandler(SimpleHTTPRequestHandler): def log_message(self, format, *args): log.append(args) from threading import Thread class TestServerThread(Thread): def __init__(self): self.ready = 0 self.done = 0 Thread.__init__(self) def run(self): from BaseHTTPServer import HTTPServer httpd = HTTPServer(('',_PORT), TestRequestHandler) self.ready = 1 while not self.done: httpd.handle_request() httpd = TestServerThread() httpd.start() while not httpd.ready: time.sleep(0.1) try: spiderPlanet() finally: httpd.done = 1 import urllib urllib.urlopen('http://127.0.0.1:%d/' % _PORT).read() status = [int(rec[1]) for rec in log if str(rec[0]).startswith('GET ')] status.sort() self.assertEqual([200,200,200,200,404], status) self.verify_spiderPlanet()
def test_spiderFeed_blacklist(self): config.load(configfile) self.spiderFeed(testfeed % '1b') # verify that exactly four entries were produced self.assertEqual(4, len(glob.glob(workdir + "/planet*"))) # verify that the file names are as expected self.assertTrue( os.path.exists( os.path.join(workdir, 'planet.intertwingly.net,2006,testfeed1,1'))) os.mkdir(os.path.join(workdir, "blacklist")) os.rename( os.path.join(workdir, 'planet.intertwingly.net,2006,testfeed1,1'), os.path.join(workdir, "blacklist", 'planet.intertwingly.net,2006,testfeed1,1')) self.spiderFeed(testfeed % '1b') self.assertEqual(3, len(glob.glob(workdir + "/planet*")))
def test_apply_asf(self): config.load(configfile % 'asf') splice.apply(self.feeddata) # verify that selected files are there for file in ['index.html', 'default.css', 'images/foaf.png']: path = os.path.join(workdir, file) self.assertTrue(os.path.exists(path)) self.assertTrue(os.stat(path).st_size > 0, file + ' has size 0') # verify that index.html is well formed, has content, and xml:lang html = open(os.path.join(workdir, 'index.html')) doc = minidom.parse(html) list = [] content = lang = 0 for div in doc.getElementsByTagName('div'): if div.getAttribute('class') != 'content': continue content += 1 if div.getAttribute('xml:lang') == 'en-us': lang += 1 html.close() self.assertEqual(12, content) self.assertEqual(3, lang)
def test_index_spider(self): import test_spider config.load(test_spider.configfile) index = idindex.create() self.assertEqual(0, len(index)) index.close() from planet.spider import spiderPlanet try: spiderPlanet() index = idindex.open() self.assertEqual(12, len(index)) self.assertEqual('tag:planet.intertwingly.net,2006:testfeed1', index['planet.intertwingly.net,2006,testfeed1,1']) self.assertEqual( 'http://intertwingly.net/code/venus/tests/data/spider/testfeed3.rss', index['planet.intertwingly.net,2006,testfeed3,1']) index.close() finally: shutil.rmtree(test_spider.workdir) os.removedirs(os.path.split(test_spider.workdir)[0])
def test_index_splice(self): import test_splice config.load(test_splice.configfile) index = idindex.create() self.assertEqual(12, len(index)) self.assertEqual('tag:planet.intertwingly.net,2006:testfeed1', index['planet.intertwingly.net,2006,testfeed1,1']) self.assertEqual( 'http://intertwingly.net/code/venus/tests/data/spider/testfeed3.rss', index['planet.intertwingly.net,2006,testfeed3,1']) for key in index.keys(): value = index[key] if value.find('testfeed2') > 0: index[key] = value.swapcase() index.close() from planet.splice import splice doc = splice() self.assertEqual(8, len(doc.getElementsByTagName('entry'))) self.assertEqual(4, len(doc.getElementsByTagName('planet:source'))) self.assertEqual(12, len(doc.getElementsByTagName('planet:name')))
source[0].childNodes[0].nodeValue doc.freeDoc() except: log.error(file) log.info(str(len(index.keys())) + " entries indexed") index.close() return open() if __name__ == '__main__': if len(sys.argv) < 2: print 'Usage: %s [-c|-d]' % sys.argv[0] sys.exit(1) config.load(sys.argv[1]) if len(sys.argv) > 2 and sys.argv[2] == '-c': create() elif len(sys.argv) > 2 and sys.argv[2] == '-d': destroy() else: from planet import logger as log index = open() if index: log.info(str(len(index.keys())) + " entries indexed") index.close() else: log.info("no entries indexed")
def test_spiderUpdate(self): config.load(configfile) self.spiderFeed(testfeed % '1a') self.spiderFeed(testfeed % '1b') self.verify_spiderFeed()
def test_spiderPlanet(self): config.load(configfile) spiderPlanet() self.verify_spiderPlanet()
def setUp(self): config.load('tests/data/config/themed.ini')
expunge = 1 elif arg == "-d" or arg == "--debug-splice": debug_splice = 1 elif arg == "--no-publish": no_publish = 1 elif arg.startswith("-"): print >> sys.stderr, "Unknown option:", arg sys.exit(1) else: config_file.append(arg) import locale locale.setlocale(locale.LC_ALL, "") from planet import config config.load(config_file or 'config.ini') if verbose: import planet planet.getLogger('DEBUG', config.log_format()) if not offline: from planet import spider try: spider.spiderPlanet(only_if_new=only_if_new) except Exception, e: print e from planet import splice doc = splice.splice()
a major change in the contract between stages """ import shutil, os, sys # move up a directory sys.path.insert(0, os.path.split(sys.path[0])[0]) os.chdir(sys.path[0]) # copy spider output to splice input import planet from planet import spider, config planet.getLogger('CRITICAL', None) config.load('tests/data/spider/config.ini') spider.spiderPlanet() if os.path.exists('tests/data/splice/cache'): shutil.rmtree('tests/data/splice/cache') shutil.move('tests/work/spider/cache', 'tests/data/splice/cache') source = open('tests/data/spider/config.ini') dest1 = open('tests/data/splice/config.ini', 'w') dest1.write(source.read().replace('/work/spider/', '/data/splice/')) dest1.close() source.seek(0) dest2 = open('tests/work/apply_config.ini', 'w') dest2.write(source.read().replace( '[Planet]', '''[Planet] output_theme = asf
def setUp(self): config.load('tests/data/config/rlist-csv.ini')