Beispiel #1
0
def match_beitrag(engine, beitrag):
    nkp = nk_persons()
    beitrag_print = make_long_name(beitrag)
    log.info("Matching: %s", beitrag_print)
    try:
        value = match_speaker(beitrag_print)
        if sl.find_one(engine, sl.get_table(engine, 'person'),
                fingerprint=value) is None:
            make_person(beitrag, value, engine)
        return value
    except NKNoMatch, nm:
        log.info("Beitrag person is unknown: %s", beitrag_print)
        return None
Beispiel #2
0
def match_beitrag(db, master, beitrag, prints):
    beitrag_print = make_long_name(beitrag)
    print "Matching:", beitrag_print.encode('utf-8')
    matches = [(p, levenshtein(p, beitrag_print)) for p in prints]
    matches = sorted(matches, key=lambda (p,d): d)
    if not len(matches):
        # create new
        return make_person(beitrag, beitrag_print, db)
    first, dist = matches[0]
    if dist == 0:
        return first
    NameMatch = master['name_match']
    match = NameMatch.find_one(dirty=beitrag_print)
    if match is not None:
        Person = db['person']
        if Person.find_one(fingerprint=match.get('clean')) is None:
            return make_person(beitrag, match.get('clean'), db)
        return match.get('clean')
    try:
        user_res = ask_user(beitrag, beitrag_print, matches, db)
        NameMatch.writerow({'dirty': beitrag_print, 'clean': user_res})
        return user_res
    except ValueError: pass
Beispiel #3
0
def ask_user(beitrag, beitrag_print, matches, db):
    for i, (fp, dist) in enumerate(matches[:20]):
        m = " %s: %s (%s)" % (i, fp, dist)
        print m.encode('utf-8')
    sys.stdout.write("Enter choice or 'n' for new, 'x' for non-speaker [0]: ")
    sys.stdout.flush()
    line = sys.stdin.readline()
    if not len(line.strip()):
        return matches[0][0]
    try:
        idx = int(line)
        ma, score = matches[idx]
        return ma
    except ValueError:
        line = line.lower().strip()
        if line == 'm':
            return ask_user(beitrag, beitrag_print, matches[20:], db)
        if line == 'x':
            raise ValueError()
        if line == 'n' and beitrag is not None:
            print "CREATING", beitrag_print.encode("utf-8")
            return make_person(beitrag, beitrag_print, db)