def generate_person_long_names(engine): log.info("Generating person fingerprints and slugs...") from offenesparlament.transform.namematch import match_speaker nkp = nk_persons() Person = sl.get_table(engine, 'person') for person in sl.find(engine, Person): long_name = make_long_name(person) try: long_name = match_speaker(long_name) except NKNoMatch: pass log.info(" -> %s" % long_name.strip()) slug = url_slug(long_name) sl.upsert(engine, Person, { 'fingerprint': long_name, 'slug': slug, 'id': person['id']}, unique=['id']) tries = 0 while True: try: nkp.ensure_value(long_name, data=person) except ValueError, E: log.warn('Exception: %s' % str(E)) tries = tries + 1 if tries > 5: raise else: break
def match_speaker(speaker): nkp = nk_persons() if speaker not in _SPEAKER_CACHE: try: obj = nkp.lookup(speaker) except NKInvalid, inv: obj = inv except NKNoMatch, nm: obj = nm
def match_beitrag(engine, beitrag): nkp = nk_persons() beitrag_print = make_long_name(beitrag) log.info("Matching: %s", beitrag_print) try: value = match_speaker(beitrag_print) if sl.find_one(engine, sl.get_table(engine, 'person'), fingerprint=value) is None: make_person(beitrag, value, engine) return value except NKNoMatch, nm: log.info("Beitrag person is unknown: %s", beitrag_print) return None
def make_person(beitrag, fp, engine): from offenesparlament.transform.namematch import match_speaker nkp = nk_persons() try: fp = match_speaker(fp) person = { 'fingerprint': fp, 'vorname': beitrag['vorname'], 'nachname': beitrag['nachname'], 'ort': beitrag.get('ort'), 'ressort': beitrag.get('ressort'), 'land': beitrag.get('land'), 'fraktion': beitrag.get('fraktion') } sl.upsert(engine, sl.get_table(engine, 'person'), person, unique=['fingerprint']) nkp.ensure_value(fp, data=person) except NKNoMatch: pass return fp