def chanmsg(self, conn, user, channel, message): ch = self.channels[conn.factory.network, conn.irclower(channel)] if message.strip().lower() == "!next": ch.nobodygotit() elif message.strip().lower() == "!stop": ch.on = False ch.timer.stop() elif message.strip().lower() == "!start": ch.on = True ch.doquestion() elif message.strip().lower() == "!hint": stars = [i for i, c in enumerate(ch.hint) if c == "*"] n = min(3, len(stars)-3) if n >= 1: for i in random.sample(stars, n): ch.hint = ch.hint[:i] + ch.answer[i] + ch.hint[i+1:] ch.post("Hint!!!!! \x037" + ch.hint) elif ch.on: inp = ' '.join(message.strip().lower().split()) ans = ' '.join(ch.answer.strip().lower().split()) if inp == ans: ch.post("%s got the answer! %s" % (irc.usersplit(user).group("nick"), ch.explanation)) ch.doquestion() elif dameraulevenshtein.dameraulevenshtein(inp, ans) / len(ans) <= .2: ch.post(message + "? \x033That's close!") print "levenshtein" elif metaphone.dm(inp) == metaphone.dm(ans): ch.post(message + "? \x033That's close!") print "metaphone"
def calc_dists(mine, theirs): ''' Calculates Levenshtein distance, Damerau-Levenshtein distance, and Jaro-Winkler distance between two strings. Returns a 3-item tuple containing results, respectively. ''' my_str = unicode(mine) search_str = unicode(theirs) # find levenshtein distance lev = distance(my_str, search_str) # find damerau-levenshtein distance dl = dameraulevenshtein(my_str, search_str) # find jaro-winkler distance jw = jarow(my_str, search_str) return (lev, dl, jw)
def rank_candidates(): for cand in candidates: yield dameraulevenshtein(cand, word), cand
def find_item_by_dl(self, search): results = [(dameraulevenshtein(search, name), name) for name in self.nodes_iter()] results.sort() return results
def reconcile_product(raw_product): product_map = {u'anti-diphtheria serum': u'anti-diphtheria-serum', u'anti-diphtheria-serum': u'anti-diphtheria-serum', u'anti-scorpion venom': u'anti-scorpion-venom', u'anti-scorpion-venom': u'anti-scorpion-venom', u'anti-tetanus serum': u'anti-tetanus-serum', u'anti-tetanus-serum': u'anti-tetanus-serum', u'antivenin': u'snake-antivenom', u'bcg': u'bcg-20', u'bcg-10': u'bcg-20', u'bcg-20': u'bcg-20', u'bopv-20': u'bopv-20', u'bvpo': u'bopv-20', u'bvpo-20': u'bopv-20', u'cholera': u'cholera-2', u'cholera-1': u'cholera-1', u'cholera-2': u'cholera-2', u'dt-10': u'dt-10', u'dt-20': u'dt-20', u'dtc': u'dtp-20', u'dtc-10': u'dtp-20', u'dtc-hepb': u'dtp-hepb-10', u'dtc-hepb-10': u'dtp-hepb-10', u'dtc-hepb-2': u'dtp-hepb-2', u'dtc-hepb-hib': u'dtp-hepbhib-2', u'dtc-hepb-hib-1': u'dtp-hepbhib-1', u'dtc-hepb-hib-2': u'dtp-hepbhib-2', u'dtc-hib': u'dtp-hib-10', u'dtc-hib-1 (liquide)': u'dtp-hib-1-lqd', u'dtc-hib-1 (lyophilise)': u'dtp-hib-1-lph', u'dtc-hib-10': u'dtp-hib-10', u'dtp-10': u'dtp-10', u'dtp-20': u'dtp-20', u'dtp-hepb+hib-1': u'dtp-hepbhib-1', u'dtp-hepb+hib-2': u'dtp-hepbhib-2', u'dtp-hepb-10': u'dtp-hepb-10', u'dtp-hepb-2': u'dtp-hepb-2', u'dtp-hepb-hib-1': u'dtp-hepbhib-1', u'dtp-hepbhib-1': u'dtp-hepbhib-1', u'dtp-hepbhib-2': u'dtp-hepbhib-2', u'dtp-hib-1 (lph)': u'dtp-hib-1-lph', u'dtp-hib-1 (lqd)': u'dtp-hib-1-lqd', u'dtp-hib-1-lph': u'dtp-hib-1-lph', u'dtp-hib-1-lqd': u'dtp-hib-1-lqd', u'dtp-hib-10': u'dtp-hib-10', u'foetal bovine serum': u'foetal-bovine-serum', u'foetal-bovine-serum': u'foetal-bovine-serum', u'hepb': u'hepb-20', u'hepb adulte': u'hepb-10-adult', u'hepb-1': u'hepb-1', u'hepb-1 adult': u'hepb-1-adult', u'hepb-1 adulte': u'hepb-1-adult', u'hepb-1-adult': u'hepb-1-adult', u'hepb-10': u'hepb-10', u'hepb-10 adult': u'hepb-10-adult', u'hepb-10 adulte': u'hepb-10-adult', u'hepb-10-adult': u'hepb-10-adult', u'hepb-2': u'hepb-2', u'hepb-20': u'hepb-20', u'hepb-6': u'hepb-6', u'hib-1': u'hib-1', u'hib-1 (liquide)': u'hib-1-lqd', u'hib-1 (lph)': u'hib-1-lph', u'hib-1 (lqd)': u'hib-1-lqd', u'hib-1 (lyophilise)': u'hib-1-lph', u'hib-1-lph': u'hib-1-lph', u'hib-1-lqd': u'hib-1-lqd', u'hib-10': u'hib-10', u'influenza-1': u'influenza-1', u'influenza-10': u'influenza-10', u'ipv-1': u'ipv-1', u'ipv-10': u'ipv-10', u'mea-1': u'mea-1', u'mea-10': u'mea-10', u'mea-20': u'mea-20', u'measles': u'measles', u'mening a/c-10': u'mening-ac-10', u'mening a/c-50': u'mening-ac-50', u'mening ac&w-50': u'mening-acw-50', u'mening acyw135-1': u'mening-acyw135-1', u'mening acyw135-10': u'mening-acyw135-10', u'mening-ac-10': u'mening-ac-10', u'mening-ac-50': u'mening-ac-50', u'mening-acw-50': u'mening-acw-50', u'mening-acyw135-1': u'mening-acyw135-1', u'mening-acyw135-10': u'mening-acyw135-10', u'mmr-1': u'mmr-1', u'mmr-10': u'mmr-10', u'mmr-5': u'mmr-5', u'mopv1-20': u'mopv1-20', u'mopv3-20': u'mopv3-20', u'mr-10': u'mr-10', u'mr-2': u'mr-2', u'mumps': u'mumps', u'mvpo1': u'mopv1-20', u'mvpo1-20': u'mopv1-20', u'mvpo3': u'mopv3-20', u'mvpo3-20': u'mopv3-20', u'opv bulk': u'opv-bulk', u'opv-50': u'opv-50', u'opv-bulk': u'opv-bulk', u'pcv1-7': u'pcv1-7', u'pneumococcal serum': u'pneumococcal-serum', u'pneumococcal-serum': u'pneumococcal-serum', u'rabies': u'rabies', u'rabies hdc-1': u'rabies-hdc-1', u'rabies hri-1': u'rabies-hri-1', u'rabies-1': u'rabies-1', u'rabies-2': u'rabies-2', u'rabies-5': u'rabies-5', u'rabies-hdc-1': u'rabies-hdc-1', u'rabies-hri-1': u'rabies-hri-1', u'ror': u'mmr-10', u'ror-1': u'mmr-1', u'ror-10': u'mmr-10', u'ror-5': u'mmr-5', u'rubella-1': u'rubella-1', u'rubella-10': u'rubella-10', u'serum anti-diphterie': u'anti-diphtheria-serum', u'serum anti-scorpion': u'anti-scorpion-venom', u'serum anti-tetanique': u'anti-tetanus-serum', u'snake antivenom': u'snake-antivenom', u'snake-antivenom': u'snake-antivenom', u'td-10': u'td-10', u'td-20': u'td-20', u'topv-10': u'topv-10', u'topv-20': u'topv-20', u'tt uniject': u'tt-uniject', u'tt-10': u'tt-10', u'tt-20': u'tt-20', u'tt-uniject': u'tt-uniject', u'tuberculin/ppd': u'tuberculinppd', u'tuberculinppd': u'tuberculinppd', u'tvpo': u'topv-10', u'tvpo-10': u'topv-10', u'tvpo-20': u'topv-20', u'typhoid': u'typhoid', u'vaa': u'yf-50', u'vaa-1': u'yf-1', u'vaa-10': u'yf-10', u'vaa-20': u'yf-20', u'vaa-5': u'yf-5', u'vaa-50': u'yf-50', u'vam ac-10': u'mening-ac-10', u'vam ac-50': u'mening-ac-50', u'vam w135-1': u'mening-acyw135-1', u'vam w135-10': u'mening-acyw135-10', u'vam w50': u'mening-acw-50', u'vao': u'mumps', u'var': u'measles', u'var-1': u'mea-1', u'var-10': u'mea-10', u'var-20': u'mea-20', u'vat': u'tt-20', u'vpi-1': u'ipv-1', u'vpi-10': u'ipv-10', u'vpo': u'opv-50', u'vpo-50': u'opv-50', u'yf-1': u'yf-1', u'yf-10': u'yf-10', u'yf-20': u'yf-20', u'yf-5': u'yf-5', u'yf-50': u'yf-50'} # check if term is a key in product_map if raw_product.lower() in product_map: return True, product_map[raw_product.lower()] # list for dm distance results # as tuples [(edit_distance, "product_map key"),...] dl_dists = [] # find damerau-levenshtein distance between # raw_product and all of the keys in product_map for term in product_map.keys(): dl = dameraulevenshtein(term, raw_product) dl_dists.append((dl, raw_product, term, product_map[term])) # sort tuples by edit distance dl_dists.sort(None, operator.itemgetter(0)) # edit distance threshold is half the length of the # raw_product string (rounded up) # TODO accept as parameter dl_threshold = math.ceil(float(len(raw_product))/float(2)) # limit to edit distances below dl_threshold dl_suggestions = [d for d in dl_dists if d[0] <= dl_threshold] return False, {"damerau-levenshtein": dl_suggestions}