def match_beitrag(engine, beitrag): nkp = nk_persons() beitrag_print = make_long_name(beitrag) log.info("Matching: %s", beitrag_print) try: value = match_speaker(beitrag_print) if sl.find_one(engine, sl.get_table(engine, 'person'), fingerprint=value) is None: make_person(beitrag, value, engine) return value except NKNoMatch, nm: log.info("Beitrag person is unknown: %s", beitrag_print) return None
def match_beitrag(db, master, beitrag, prints): beitrag_print = make_long_name(beitrag) print "Matching:", beitrag_print.encode('utf-8') matches = [(p, levenshtein(p, beitrag_print)) for p in prints] matches = sorted(matches, key=lambda (p,d): d) if not len(matches): # create new return make_person(beitrag, beitrag_print, db) first, dist = matches[0] if dist == 0: return first NameMatch = master['name_match'] match = NameMatch.find_one(dirty=beitrag_print) if match is not None: Person = db['person'] if Person.find_one(fingerprint=match.get('clean')) is None: return make_person(beitrag, match.get('clean'), db) return match.get('clean') try: user_res = ask_user(beitrag, beitrag_print, matches, db) NameMatch.writerow({'dirty': beitrag_print, 'clean': user_res}) return user_res except ValueError: pass
def ask_user(beitrag, beitrag_print, matches, db): for i, (fp, dist) in enumerate(matches[:20]): m = " %s: %s (%s)" % (i, fp, dist) print m.encode('utf-8') sys.stdout.write("Enter choice or 'n' for new, 'x' for non-speaker [0]: ") sys.stdout.flush() line = sys.stdin.readline() if not len(line.strip()): return matches[0][0] try: idx = int(line) ma, score = matches[idx] return ma except ValueError: line = line.lower().strip() if line == 'm': return ask_user(beitrag, beitrag_print, matches[20:], db) if line == 'x': raise ValueError() if line == 'n' and beitrag is not None: print "CREATING", beitrag_print.encode("utf-8") return make_person(beitrag, beitrag_print, db)