def upgrade(): op.add_column('person', sa.Column('slug', sa.Text(), nullable=True)) op.create_unique_constraint('person_unique_slug', 'person', ['slug']) from mptracker.common import generate_slug used = set() conn = op.get_bind() for id, name in list(conn.execute("SELECT id, name FROM person")): slug = generate_slug(name, lambda v: v not in used) conn.execute("UPDATE person SET slug=%s WHERE id=%s", slug, id) used.add(slug) op.alter_column('person', 'slug', nullable=False)
def test_simple_slug(): from mptracker.common import generate_slug assert generate_slug('foo') == 'foo' assert generate_slug(' bar ') == 'bar' assert generate_slug('Foo Bar') == 'foo-bar' assert generate_slug('Foo Bar bAz') == 'foo-bar-baz' assert generate_slug('"wtf"') == 'wtf' assert generate_slug("___-%") == '-'
def test_sequential_limit(): from mptracker.common import generate_slug with pytest.raises(RuntimeError): generate_slug('foo', lambda v: False)
def test_sequential(): from mptracker.common import generate_slug values = [] for c in range(4): values.append(generate_slug('foo', lambda v: v not in values)) assert values == ['foo', 'foo-1', 'foo-2', 'foo-3']
def test_diacritics(): from mptracker.common import generate_slug assert generate_slug('șțuâîâü') == 'stuaiau'
def get_people( year='2012', cache_name=None, throttle=None, no_commit=False, add_people=False, ): from mptracker.scraper.people import MandateScraper http_session = create_session( cache_name=cache_name or _get_config_cache_name(), throttle=throttle and float(throttle), ) mandate_scraper = MandateScraper(http_session) mandate_patcher = TablePatcher( models.Mandate, models.db.session, key_columns=['year', 'cdep_number'], ) person_patcher = TablePatcher( models.Person, models.db.session, key_columns=['id'], ) term_interval = TERM_INTERVAL[int(year)] new_people = 0 chamber_by_slug = {c.slug: c for c in models.Chamber.query} with mandate_patcher.process() as add_mandate, \ person_patcher.process() as add_person: for mandate in mandate_scraper.fetch(year): row = mandate.as_dict([ 'year', 'cdep_number', 'minority', 'college', 'constituency', 'picture_url', ]) assert mandate.chamber_number == 2 row['chamber_id'] = chamber_by_slug['cdep'].id start_date = mandate.start_date or term_interval.lower end_date = mandate.end_date or term_interval.upper or date.max row['interval'] = DateRange(start_date, end_date) person = ( models.Person.query .filter_by(name=mandate.person_name) .first()) if person is None: if add_people: person = models.Person( name=mandate.person_name, slug=generate_slug(mandate.person_name), ) models.db.session.add(person) models.db.session.flush() new_people += 1 else: raise RuntimeError("Can't find person named %r" % mandate.person_name) assert not add_person({ 'id': person.id, 'first_name': mandate.person_first_name, 'last_name': mandate.person_last_name, }).is_new row['person_id'] = person.id if not mandate.minority: county = ( models.County.query .filter_by(name=mandate.county_name) .first()) if county is None: raise RuntimeError("Can't match county name %r" % mandate.county_name) row['county'] = county add_mandate(row) if new_people: logger.info("%d new people", new_people) if no_commit: logger.warn("Rolling back the transaction") models.db.session.rollback() else: models.db.session.commit()