with open(join(PATH, 'urls', urls_filename)) as urls: urls = urls.read().splitlines() ignored = lambda url: not url.strip().startswith('#') urls = filter(ignored, urls) urls = filter(None, urls) # Path to scraped feed data for this state. STATE_FEED_DATA = join('data', 'feeds') try: shutil.rmtree(STATE_FEED_DATA) except OSError: pass try: os.makedirs(STATE_FEED_DATA) except OSError: pass extractor = Extractor(abbr) for url in urls: feed = Feed(url) for entry in feed.entries(): extractor.process_entry(entry.entry) import ipdb;ipdb.set_trace() entry.finish_report(abbr) entry.save_if_entities_found() feed.finish_report() feed.save()
with open(join(PATH, 'urls', urls_filename)) as urls: urls = urls.read().splitlines() ignored = lambda url: not url.strip().startswith('#') urls = filter(ignored, urls) urls = filter(None, urls) # Path to scraped feed data for this state. STATE_FEED_DATA = join('data', 'feeds') try: shutil.rmtree(STATE_FEED_DATA) except OSError: pass try: os.makedirs(STATE_FEED_DATA) except OSError: pass extractor = Extractor(abbr) for url in urls: feed = Feed(url) for entry in feed.entries(): extractor.process_entry(entry.entry) import ipdb ipdb.set_trace() entry.finish_report(abbr) entry.save_if_entities_found() feed.finish_report() feed.save()