Esempio n. 1
0
            urls = filter(ignored, urls)
            urls = filter(None, urls)

        # Path to scraped feed data for this state.
        STATE_FEED_DATA = join('data', 'feeds')

        try:
            shutil.rmtree(STATE_FEED_DATA)
        except OSError:
            pass

        try:
            os.makedirs(STATE_FEED_DATA)
        except OSError:
            pass

        extractor = Extractor(abbr)
        for url in urls:
            feed = Feed(url, jurisdiction=abbr)
            import ipdb;ipdb.set_trace()
            et = list(feed.entries())
            if not et:
                import ipdb;ipdb.set_trace()
            for entry in feed.entries():
                extractor.process_entry(entry.entry)
                entry.finish_report(abbr)
                entry.save_if_entities_found()
            feed.finish_report()
            feed.save()

Esempio n. 2
0
        with open(join(PATH, 'urls', urls_filename)) as urls:
            urls = urls.read().splitlines()
            ignored = lambda url: not url.strip().startswith('#')
            urls = filter(ignored, urls)
            urls = filter(None, urls)

        # Path to scraped feed data for this state.
        STATE_FEED_DATA = join('data', 'feeds')

        try:
            shutil.rmtree(STATE_FEED_DATA)
        except OSError:
            pass

        try:
            os.makedirs(STATE_FEED_DATA)
        except OSError:
            pass

        extractor = Extractor(abbr)
        for url in urls:
            feed = Feed(url)
            for entry in feed.entries():
                extractor.process_entry(entry.entry)
                import ipdb;ipdb.set_trace()
                entry.finish_report(abbr)
                entry.save_if_entities_found()
            feed.finish_report()
            feed.save()

Esempio n. 3
0
        with open(join(PATH, 'urls', urls_filename)) as urls:
            urls = urls.read().splitlines()
            ignored = lambda url: not url.strip().startswith('#')
            urls = filter(ignored, urls)
            urls = filter(None, urls)

        # Path to scraped feed data for this state.
        STATE_FEED_DATA = join('data', 'feeds')

        try:
            shutil.rmtree(STATE_FEED_DATA)
        except OSError:
            pass

        try:
            os.makedirs(STATE_FEED_DATA)
        except OSError:
            pass

        extractor = Extractor(abbr)
        for url in urls:
            feed = Feed(url)
            for entry in feed.entries():
                extractor.process_entry(entry.entry)
                import ipdb
                ipdb.set_trace()
                entry.finish_report(abbr)
                entry.save_if_entities_found()
            feed.finish_report()
            feed.save()