def people( year='2012', cache_name=None, throttle=None, no_commit=False, ): from mptracker.scraper.people import MandateScraper http_session = create_session( cache_name=cache_name, throttle=throttle and float(throttle), ) mandate_scraper = MandateScraper(http_session) mandate_patcher = TablePatcher( models.Mandate, models.db.session, key_columns=['year', 'cdep_number'], ) with mandate_patcher.process() as add_mandate: for mandate in mandate_scraper.fetch(year): row = mandate.as_dict([ 'year', 'cdep_number', 'minority', 'college', 'constituency', ]) if year == '2012': end_date = mandate.end_date or date.max row['interval'] = DateRange(TERM_2012_START, end_date) person = ( models.Person.query .filter_by(name=mandate.person_name) .first()) if person is None: raise RuntimeError("Can't find person named %r" % mandate.person_name) row['person_id'] = person.id if not mandate.minority: county = ( models.County.query .filter_by(name=mandate.county_name) .first()) if county is None: raise RuntimeError("Can't match county name %r" % mandate.county_name) row['county'] = county add_mandate(row) if no_commit: logger.warn("Rolling back the transaction") models.db.session.rollback() else: models.db.session.commit()
def people( year='2012', cache_name=None, throttle=None, no_commit=False, add_people=False, ): from mptracker.scraper.people import MandateScraper http_session = create_session( cache_name=cache_name, throttle=throttle and float(throttle), ) mandate_scraper = MandateScraper(http_session) mandate_patcher = TablePatcher( models.Mandate, models.db.session, key_columns=['year', 'cdep_number'], ) person_patcher = TablePatcher( models.Person, models.db.session, key_columns=['id'], ) new_people = 0 chamber_by_slug = {c.slug: c for c in models.Chamber.query} with mandate_patcher.process() as add_mandate, \ person_patcher.process() as add_person: for mandate in mandate_scraper.fetch(year): row = mandate.as_dict([ 'year', 'cdep_number', 'minority', 'college', 'constituency', 'picture_url', ]) assert mandate.chamber_number == 2 row['chamber_id'] = chamber_by_slug['cdep'].id if year == '2012': end_date = mandate.end_date or date.max row['interval'] = DateRange(TERM_2012_START, end_date) person = ( models.Person.query .filter_by(name=mandate.person_name) .first()) if person is None: if add_people: person = models.Person(name=mandate.person_name) models.db.session.add(person) models.db.session.flush() new_people += 1 else: raise RuntimeError("Can't find person named %r" % mandate.person_name) assert not add_person({ 'id': person.id, 'first_name': mandate.person_first_name, 'last_name': mandate.person_last_name, }).is_new row['person_id'] = person.id if not mandate.minority: county = ( models.County.query .filter_by(name=mandate.county_name) .first()) if county is None: raise RuntimeError("Can't match county name %r" % mandate.county_name) row['county'] = county add_mandate(row) if new_people: logger.info("%d new people", new_people) if no_commit: logger.warn("Rolling back the transaction") models.db.session.rollback() else: models.db.session.commit()