예제 #1
0
def generate_person_long_names(engine):
    log.info("Generating person fingerprints and slugs...")
    from offenesparlament.transform.namematch import match_speaker
    nkp = nk_persons()
    Person = sl.get_table(engine, 'person')
    for person in sl.find(engine, Person):
        long_name = make_long_name(person)
        try:
            long_name = match_speaker(long_name)
        except NKNoMatch:
            pass
        log.info(" -> %s" % long_name.strip())
        slug = url_slug(long_name)
        sl.upsert(engine, Person, {
                         'fingerprint': long_name,
                         'slug': slug,
                         'id': person['id']},
                         unique=['id'])
        tries = 0
        while True:
            try:
                nkp.ensure_value(long_name, data=person)
            except ValueError, E:
                log.warn('Exception: %s' % str(E))
                tries = tries + 1
                if tries > 5:
                    raise
            else:
                break
예제 #2
0
def generate_person_long_names(db):
    log.info("Generating person fingerprints and slugs...")
    Person = db['person']
    for person in Person:
        long_name = make_long_name(person)
        log.debug(" -> %s" % long_name.strip())
        slug = url_slug(long_name)
        Person.writerow({'fingerprint': long_name,
                         'slug': slug,
                         '__id__': person['__id__']}, 
                         unique_columns=['__id__'])
    for fp in Person.distinct('fingerprint'):
        if fp['_count'] > 1:
            raise ValueError("Partial fingerprint: %s" % fp['fingerprint'])

    log.info("Updating 'rollen' to have fingerprints...")
    Rolle = db['rolle']
    for person in Person:
        if person['mdb_id']:
            Rolle.writerow({
                'mdb_id': person['mdb_id'],
                'fingerprint': person['fingerprint']
                }, unique_columns=['mdb_id'])
        elif person['source_url']:
            Rolle.writerow({
                'person_source_url': person['person_source_url'],
                'fingerprint': person['fingerprint']
                }, unique_columns=['person_source_url'])