def generate_person_long_names(engine): log.info("Generating person fingerprints and slugs...") from offenesparlament.transform.namematch import match_speaker nkp = nk_persons() Person = sl.get_table(engine, 'person') for person in sl.find(engine, Person): long_name = make_long_name(person) try: long_name = match_speaker(long_name) except NKNoMatch: pass log.info(" -> %s" % long_name.strip()) slug = url_slug(long_name) sl.upsert(engine, Person, { 'fingerprint': long_name, 'slug': slug, 'id': person['id']}, unique=['id']) tries = 0 while True: try: nkp.ensure_value(long_name, data=person) except ValueError, E: log.warn('Exception: %s' % str(E)) tries = tries + 1 if tries > 5: raise else: break
def generate_person_long_names(db): log.info("Generating person fingerprints and slugs...") Person = db['person'] for person in Person: long_name = make_long_name(person) log.debug(" -> %s" % long_name.strip()) slug = url_slug(long_name) Person.writerow({'fingerprint': long_name, 'slug': slug, '__id__': person['__id__']}, unique_columns=['__id__']) for fp in Person.distinct('fingerprint'): if fp['_count'] > 1: raise ValueError("Partial fingerprint: %s" % fp['fingerprint']) log.info("Updating 'rollen' to have fingerprints...") Rolle = db['rolle'] for person in Person: if person['mdb_id']: Rolle.writerow({ 'mdb_id': person['mdb_id'], 'fingerprint': person['fingerprint'] }, unique_columns=['mdb_id']) elif person['source_url']: Rolle.writerow({ 'person_source_url': person['person_source_url'], 'fingerprint': person['fingerprint'] }, unique_columns=['person_source_url'])