Example #1
0
    def do_scrape(self, juris, args, scrapers):
        # make output and cache dirs
        utils.makedirs(settings.CACHE_DIR)
        datadir = os.path.join(settings.SCRAPED_DATA_DIR, args.module)
        utils.makedirs(datadir)
        # clear json from data dir
        for f in glob.glob(datadir + '/*.json'):
            os.remove(f)

        report = {}

        # do jurisdiction
        jscraper = JurisdictionScraper(juris,
                                       datadir,
                                       strict_validation=args.strict,
                                       fastmode=args.fastmode)
        report['jurisdiction'] = jscraper.do_scrape()

        for scraper_name, scrape_args in scrapers.items():
            ScraperCls = juris.scrapers[scraper_name]
            scraper = ScraperCls(juris,
                                 datadir,
                                 strict_validation=args.strict,
                                 fastmode=args.fastmode)
            report[scraper_name] = scraper.do_scrape(**scrape_args)

        return report
def test_jurisdiction_bicameral_scrape():
    j = FakeJurisdiction()
    js = JurisdictionScraper(j, '/tmp/')
    objects = list(js.scrape())
    obj_names = set()
    obj_types = defaultdict(int)

    for o in objects:
        obj_names.add(o.name)
        obj_types[type(o)] += 1

    # ensure Jurisdiction and 5 organizations were found
    assert obj_names == {'Test', 'Congress', 'House', 'Senate', 'Democratic', 'Republican'}
    assert obj_types[FakeJurisdiction] == 1
    assert obj_types[Organization] == 5
Example #3
0
def test_jurisdiction_bicameral_scrape():
    j = FakeJurisdiction()
    js = JurisdictionScraper(j, '/tmp/')
    objects = list(js.scrape())
    obj_names = set()
    obj_types = defaultdict(int)

    for o in objects:
        obj_names.add(o.name)
        obj_types[type(o)] += 1

    # ensure Jurisdiction and 5 organizations were found
    assert obj_names == {'Test', 'Congress', 'House', 'Senate'}
    assert obj_types[FakeJurisdiction] == 1
    assert obj_types[Organization] == 3
Example #4
0
def test_jurisdiction_unicam_scrape():
    class UnicameralJurisdiction(Jurisdiction):
        jurisdiction_id = 'unicam'
        name = 'Unicameral'
        url = 'http://example.com'

    j = UnicameralJurisdiction()
    js = JurisdictionScraper(j, '/tmp/')
    objects = list(js.scrape())

    # two objects, first is the Jurisdiction
    assert len(objects) == 2
    assert objects[0] == j

    # ensure we made a single legislature org as well
    assert isinstance(objects[1], Organization)
    assert objects[1].classification == 'legislature'
    assert objects[1].sources[0]['url'] == j.url
Example #5
0
    def do_scrape(self, juris, args, scrapers):
        # make output and cache dirs
        utils.makedirs(settings.CACHE_DIR)
        datadir = os.path.join(settings.SCRAPED_DATA_DIR, args.module)
        utils.makedirs(datadir)
        # clear json from data dir
        for f in glob.glob(datadir + '/*.json'):
            os.remove(f)

        report = {}

        # do jurisdiction
        jscraper = JurisdictionScraper(juris, datadir, args.strict, args.fastmode)
        report['jurisdiction'] = jscraper.do_scrape()

        for scraper_name, scrape_args in scrapers.items():
            ScraperCls = juris.scrapers[scraper_name]
            scraper = ScraperCls(juris, datadir, args.strict, args.fastmode)
            report[scraper_name] = scraper.do_scrape(**scrape_args)

        return report
Example #6
0
def test_jurisdiction_unicam_scrape():
    class UnicameralJurisdiction(Jurisdiction):
        jurisdiction_id = 'unicam'
        name = 'Unicameral'
        url = 'http://example.com'

        def get_organizations(self):
            yield Organization('Unicameral Legislature',
                               classification='legislature')

    j = UnicameralJurisdiction()
    js = JurisdictionScraper(j, '/tmp/')
    objects = list(js.scrape())

    # two objects, first is the Jurisdiction
    assert len(objects) == 2
    assert objects[0] == j

    # ensure we made a single legislature org
    assert isinstance(objects[1], Organization)
    assert objects[1].classification == 'legislature'