def run(self): killer = GracefulKiller() pause_secs = 10 restart_browser_interval = 900 browser_starttime = time.time() while True: # process new blog posts: blogpostprocessor.run() # look for new papers: source = scraper.next_source() if source: scraper.scrape(source) # wait: pause_secs = 10 if source else 60 for sec in range(pause_secs): if killer.kill_now: self.stop() return time.sleep(1) # restart browser? if time.time() - browser_starttime > restart_browser_interval: browser.stop_browser() browser_starttime = time.time()
def test_next_source(testdb): src = scraper.next_source() assert src.url == 'http://consc.net/papers.html'