ignored = lambda url: not url.strip().startswith('#') urls = filter(ignored, urls) urls = filter(None, urls) # Path to scraped feed data for this state. STATE_FEED_DATA = join('data', 'feeds') try: shutil.rmtree(STATE_FEED_DATA) except OSError: pass try: os.makedirs(STATE_FEED_DATA) except OSError: pass extractor = Extractor(abbr) for url in urls: feed = Feed(url, jurisdiction=abbr) if not feed.is_valid(): continue for entry in feed.entries(): if entry.is_valid(): extractor.process_entry(entry.entry) entry.finish_report(abbr) entry.save_if_entities_found() feed.finish_report() feed.save()