def do_scrape(self, juris, args, scrapers): # make output and cache dirs utils.makedirs(settings.CACHE_DIR) datadir = os.path.join(settings.SCRAPED_DATA_DIR, args.module) utils.makedirs(datadir) # clear json from data dir for f in glob.glob(datadir + '/*.json'): os.remove(f) report = {} # do jurisdiction jscraper = JurisdictionScraper(juris, datadir, strict_validation=args.strict, fastmode=args.fastmode) report['jurisdiction'] = jscraper.do_scrape() for scraper_name, scrape_args in scrapers.items(): ScraperCls = juris.scrapers[scraper_name] scraper = ScraperCls(juris, datadir, strict_validation=args.strict, fastmode=args.fastmode) report[scraper_name] = scraper.do_scrape(**scrape_args) return report
def do_scrape(self, juris, args, scrapers): # make output and cache dirs utils.makedirs(settings.CACHE_DIR) datadir = os.path.join(settings.SCRAPED_DATA_DIR, args.module) utils.makedirs(datadir) # clear json from data dir for f in glob.glob(datadir + '/*.json'): os.remove(f) report = {} for scraper_name, scrape_args in scrapers.items(): ScraperCls = juris.scrapers[scraper_name] scraper = ScraperCls(juris, datadir, args.strict, args.fastmode) report[scraper_name] = scraper.do_scrape(**scrape_args) return report
def do_scrape(self, juris, args): # make output and cache dirs utils.makedirs(args.cachedir) utils.makedirs(args.datadir) # clear json from data dir for f in glob.glob(args.datadir + '/*.json'): os.remove(f) report = {} # run scrapers for session in args.sessions: # get mapping of ScraperClass -> scrapers session_scrapers = defaultdict(list) for scraper_type in args.scrapers: ScraperCls = juris.get_scraper(args.term, session, scraper_type) if not ScraperCls: raise Exception('no scraper for term={0} session={1} ' 'type={2}'.format(args.term, session, scraper_type)) session_scrapers[ScraperCls].append(scraper_type) report[session] = {} # run each scraper once for ScraperCls, scraper_types in session_scrapers.iteritems(): scraper = ScraperCls(juris, session, args.datadir, args.cachedir, args.strict, args.fastmode) if 'people' in scraper_types: report[session].update(scraper.scrape_people()) elif 'bills' in scraper_types: report[session].update(scraper.scrape_bills()) elif 'events' in scraper_types: report[session].update(scraper.scrape_events()) elif 'votes' in scraper_types: report[session].update(scraper.scrape_votes()) elif 'speeches' in scraper_types: report[session].update(scraper.scrape_speeches()) return report