Esempio n. 1
0
            ignored = lambda url: not url.strip().startswith('#')
            urls = filter(ignored, urls)
            urls = filter(None, urls)

        # Path to scraped feed data for this state.
        STATE_FEED_DATA = join('data', 'feeds')

        try:
            shutil.rmtree(STATE_FEED_DATA)
        except OSError:
            pass

        try:
            os.makedirs(STATE_FEED_DATA)
        except OSError:
            pass

        extractor = Extractor(abbr)
        for url in urls:
            feed = Feed(url, jurisdiction=abbr)
            if not feed.is_valid():
                continue

            for entry in feed.entries():
                if entry.is_valid():
                    extractor.process_entry(entry.entry)
                    entry.finish_report(abbr)
                    entry.save_if_entities_found()
            feed.finish_report()
            feed.save()