Beispiel #1
0
 def chanmsg(self, conn, user, channel, message):
   ch = self.channels[conn.factory.network, conn.irclower(channel)]
   if message.strip().lower() == "!next":
     ch.nobodygotit()
   elif message.strip().lower() == "!stop":
     ch.on = False
     ch.timer.stop()
   elif message.strip().lower() == "!start":
     ch.on = True
     ch.doquestion()
   elif message.strip().lower() == "!hint":
     stars = [i for i, c in enumerate(ch.hint) if c == "*"]
     n = min(3, len(stars)-3)
     if n >= 1:
       for i in random.sample(stars, n):
         ch.hint = ch.hint[:i] + ch.answer[i] + ch.hint[i+1:]
     ch.post("Hint!!!!! \x037" + ch.hint)
   elif ch.on:
     inp = ' '.join(message.strip().lower().split())
     ans = ' '.join(ch.answer.strip().lower().split())
     if inp == ans:
       ch.post("%s got the answer!  %s" % (irc.usersplit(user).group("nick"), ch.explanation))
       ch.doquestion()
     elif dameraulevenshtein.dameraulevenshtein(inp, ans) / len(ans) <= .2:
       ch.post(message + "?  \x033That's close!")
       print "levenshtein"
     elif metaphone.dm(inp) == metaphone.dm(ans):
       ch.post(message + "?  \x033That's close!")
       print "metaphone"
def calc_dists(mine, theirs):
    ''' Calculates Levenshtein distance, Damerau-Levenshtein distance,
        and Jaro-Winkler distance between two strings.

        Returns a 3-item tuple containing results, respectively.
    '''
    my_str = unicode(mine)
    search_str = unicode(theirs)
    # find levenshtein distance
    lev = distance(my_str, search_str)
    # find damerau-levenshtein distance
    dl = dameraulevenshtein(my_str, search_str)
    # find jaro-winkler distance
    jw = jarow(my_str, search_str)
    return (lev, dl, jw)
 def rank_candidates():
     for cand in candidates:
         yield dameraulevenshtein(cand, word), cand
Beispiel #4
0
	def find_item_by_dl(self, search):
		results = [(dameraulevenshtein(search, name), name) for name in self.nodes_iter()]
		results.sort()
		return results
Beispiel #5
0
def reconcile_product(raw_product):
    product_map = {u'anti-diphtheria serum': u'anti-diphtheria-serum',
    u'anti-diphtheria-serum': u'anti-diphtheria-serum',
    u'anti-scorpion venom': u'anti-scorpion-venom',
    u'anti-scorpion-venom': u'anti-scorpion-venom',
    u'anti-tetanus serum': u'anti-tetanus-serum',
    u'anti-tetanus-serum': u'anti-tetanus-serum',
    u'antivenin': u'snake-antivenom',
    u'bcg': u'bcg-20',
    u'bcg-10': u'bcg-20',
    u'bcg-20': u'bcg-20',
    u'bopv-20': u'bopv-20',
    u'bvpo': u'bopv-20',
    u'bvpo-20': u'bopv-20',
    u'cholera': u'cholera-2',
    u'cholera-1': u'cholera-1',
    u'cholera-2': u'cholera-2',
    u'dt-10': u'dt-10',
    u'dt-20': u'dt-20',
    u'dtc': u'dtp-20',
    u'dtc-10': u'dtp-20',
    u'dtc-hepb': u'dtp-hepb-10',
    u'dtc-hepb-10': u'dtp-hepb-10',
    u'dtc-hepb-2': u'dtp-hepb-2',
    u'dtc-hepb-hib': u'dtp-hepbhib-2',
    u'dtc-hepb-hib-1': u'dtp-hepbhib-1',
    u'dtc-hepb-hib-2': u'dtp-hepbhib-2',
    u'dtc-hib': u'dtp-hib-10',
    u'dtc-hib-1 (liquide)': u'dtp-hib-1-lqd',
    u'dtc-hib-1 (lyophilise)': u'dtp-hib-1-lph',
    u'dtc-hib-10': u'dtp-hib-10',
    u'dtp-10': u'dtp-10',
    u'dtp-20': u'dtp-20',
    u'dtp-hepb+hib-1': u'dtp-hepbhib-1',
    u'dtp-hepb+hib-2': u'dtp-hepbhib-2',
    u'dtp-hepb-10': u'dtp-hepb-10',
    u'dtp-hepb-2': u'dtp-hepb-2',
    u'dtp-hepb-hib-1': u'dtp-hepbhib-1',
    u'dtp-hepbhib-1': u'dtp-hepbhib-1',
    u'dtp-hepbhib-2': u'dtp-hepbhib-2',
    u'dtp-hib-1 (lph)': u'dtp-hib-1-lph',
    u'dtp-hib-1 (lqd)': u'dtp-hib-1-lqd',
    u'dtp-hib-1-lph': u'dtp-hib-1-lph',
    u'dtp-hib-1-lqd': u'dtp-hib-1-lqd',
    u'dtp-hib-10': u'dtp-hib-10',
    u'foetal bovine serum': u'foetal-bovine-serum',
    u'foetal-bovine-serum': u'foetal-bovine-serum',
    u'hepb': u'hepb-20',
    u'hepb adulte': u'hepb-10-adult',
    u'hepb-1': u'hepb-1',
    u'hepb-1 adult': u'hepb-1-adult',
    u'hepb-1 adulte': u'hepb-1-adult',
    u'hepb-1-adult': u'hepb-1-adult',
    u'hepb-10': u'hepb-10',
    u'hepb-10 adult': u'hepb-10-adult',
    u'hepb-10 adulte': u'hepb-10-adult',
    u'hepb-10-adult': u'hepb-10-adult',
    u'hepb-2': u'hepb-2',
    u'hepb-20': u'hepb-20',
    u'hepb-6': u'hepb-6',
    u'hib-1': u'hib-1',
    u'hib-1 (liquide)': u'hib-1-lqd',
    u'hib-1 (lph)': u'hib-1-lph',
    u'hib-1 (lqd)': u'hib-1-lqd',
    u'hib-1 (lyophilise)': u'hib-1-lph',
    u'hib-1-lph': u'hib-1-lph',
    u'hib-1-lqd': u'hib-1-lqd',
    u'hib-10': u'hib-10',
    u'influenza-1': u'influenza-1',
    u'influenza-10': u'influenza-10',
    u'ipv-1': u'ipv-1',
    u'ipv-10': u'ipv-10',
    u'mea-1': u'mea-1',
    u'mea-10': u'mea-10',
    u'mea-20': u'mea-20',
    u'measles': u'measles',
    u'mening a/c-10': u'mening-ac-10',
    u'mening a/c-50': u'mening-ac-50',
    u'mening ac&w-50': u'mening-acw-50',
    u'mening acyw135-1': u'mening-acyw135-1',
    u'mening acyw135-10': u'mening-acyw135-10',
    u'mening-ac-10': u'mening-ac-10',
    u'mening-ac-50': u'mening-ac-50',
    u'mening-acw-50': u'mening-acw-50',
    u'mening-acyw135-1': u'mening-acyw135-1',
    u'mening-acyw135-10': u'mening-acyw135-10',
    u'mmr-1': u'mmr-1',
    u'mmr-10': u'mmr-10',
    u'mmr-5': u'mmr-5',
    u'mopv1-20': u'mopv1-20',
    u'mopv3-20': u'mopv3-20',
    u'mr-10': u'mr-10',
    u'mr-2': u'mr-2',
    u'mumps': u'mumps',
    u'mvpo1': u'mopv1-20',
    u'mvpo1-20': u'mopv1-20',
    u'mvpo3': u'mopv3-20',
    u'mvpo3-20': u'mopv3-20',
    u'opv bulk': u'opv-bulk',
    u'opv-50': u'opv-50',
    u'opv-bulk': u'opv-bulk',
    u'pcv1-7': u'pcv1-7',
    u'pneumococcal serum': u'pneumococcal-serum',
    u'pneumococcal-serum': u'pneumococcal-serum',
    u'rabies': u'rabies',
    u'rabies hdc-1': u'rabies-hdc-1',
    u'rabies hri-1': u'rabies-hri-1',
    u'rabies-1': u'rabies-1',
    u'rabies-2': u'rabies-2',
    u'rabies-5': u'rabies-5',
    u'rabies-hdc-1': u'rabies-hdc-1',
    u'rabies-hri-1': u'rabies-hri-1',
    u'ror': u'mmr-10',
    u'ror-1': u'mmr-1',
    u'ror-10': u'mmr-10',
    u'ror-5': u'mmr-5',
    u'rubella-1': u'rubella-1',
    u'rubella-10': u'rubella-10',
    u'serum anti-diphterie': u'anti-diphtheria-serum',
    u'serum anti-scorpion': u'anti-scorpion-venom',
    u'serum anti-tetanique': u'anti-tetanus-serum',
    u'snake antivenom': u'snake-antivenom',
    u'snake-antivenom': u'snake-antivenom',
    u'td-10': u'td-10',
    u'td-20': u'td-20',
    u'topv-10': u'topv-10',
    u'topv-20': u'topv-20',
    u'tt uniject': u'tt-uniject',
    u'tt-10': u'tt-10',
    u'tt-20': u'tt-20',
    u'tt-uniject': u'tt-uniject',
    u'tuberculin/ppd': u'tuberculinppd',
    u'tuberculinppd': u'tuberculinppd',
    u'tvpo': u'topv-10',
    u'tvpo-10': u'topv-10',
    u'tvpo-20': u'topv-20',
    u'typhoid': u'typhoid',
    u'vaa': u'yf-50',
    u'vaa-1': u'yf-1',
    u'vaa-10': u'yf-10',
    u'vaa-20': u'yf-20',
    u'vaa-5': u'yf-5',
    u'vaa-50': u'yf-50',
    u'vam ac-10': u'mening-ac-10',
    u'vam ac-50': u'mening-ac-50',
    u'vam w135-1': u'mening-acyw135-1',
    u'vam w135-10': u'mening-acyw135-10',
    u'vam w50': u'mening-acw-50',
    u'vao': u'mumps',
    u'var': u'measles',
    u'var-1': u'mea-1',
    u'var-10': u'mea-10',
    u'var-20': u'mea-20',
    u'vat': u'tt-20',
    u'vpi-1': u'ipv-1',
    u'vpi-10': u'ipv-10',
    u'vpo': u'opv-50',
    u'vpo-50': u'opv-50',
    u'yf-1': u'yf-1',
    u'yf-10': u'yf-10',
    u'yf-20': u'yf-20',
    u'yf-5': u'yf-5',
    u'yf-50': u'yf-50'}

    # check if term is a key in product_map
    if raw_product.lower() in product_map:
        return True, product_map[raw_product.lower()] 

    # list for dm distance results
    # as tuples [(edit_distance, "product_map key"),...]
    dl_dists = []

    # find damerau-levenshtein distance between
    # raw_product and all of the keys in product_map
    for term in product_map.keys():
	dl = dameraulevenshtein(term, raw_product)
	dl_dists.append((dl, raw_product, term, product_map[term]))

    # sort tuples by edit distance
    dl_dists.sort(None, operator.itemgetter(0))

    # edit distance threshold is half the length of the
    # raw_product string (rounded up)
    # TODO accept as parameter
    dl_threshold = math.ceil(float(len(raw_product))/float(2))

    # limit to edit distances below dl_threshold
    dl_suggestions = [d for d in dl_dists if d[0] <= dl_threshold]

    return False, {"damerau-levenshtein": dl_suggestions}