Esempio n. 1
0
 def chanmsg(self, conn, user, channel, message):
   ch = self.channels[conn.factory.network, conn.irclower(channel)]
   if message.strip().lower() == "!next":
     ch.nobodygotit()
   elif message.strip().lower() == "!stop":
     ch.on = False
     ch.timer.stop()
   elif message.strip().lower() == "!start":
     ch.on = True
     ch.doquestion()
   elif message.strip().lower() == "!hint":
     stars = [i for i, c in enumerate(ch.hint) if c == "*"]
     n = min(3, len(stars)-3)
     if n >= 1:
       for i in random.sample(stars, n):
         ch.hint = ch.hint[:i] + ch.answer[i] + ch.hint[i+1:]
     ch.post("Hint!!!!! \x037" + ch.hint)
   elif ch.on:
     inp = ' '.join(message.strip().lower().split())
     ans = ' '.join(ch.answer.strip().lower().split())
     if inp == ans:
       ch.post("%s got the answer!  %s" % (irc.usersplit(user).group("nick"), ch.explanation))
       ch.doquestion()
     elif dameraulevenshtein.dameraulevenshtein(inp, ans) / len(ans) <= .2:
       ch.post(message + "?  \x033That's close!")
       print "levenshtein"
     elif metaphone.dm(inp) == metaphone.dm(ans):
       ch.post(message + "?  \x033That's close!")
       print "metaphone"
Esempio n. 2
0
def __get_values(params):
    return [
        params['name']['last'],
        params['name']['first'],
        params['name']['middle'],
        params['name']['suffix'],
        params['name']['nickname'],
        dm(params['name']['last'])[0],
        dm(params['name']['first'])[0],
        dm(params['name']['nickname'])[0],
        params['voter_info']['birth_year'],
        params['voter_info']['gender'],
        params['contact_info']['email'],
        params['contact_info']['phone1'],
        params['contact_info']['phone2'],
        params['address']['house_number'],
        params['address']['pre_direction'],
        params['address']['street_name'],
        params['address']['street_type'],
        params['address']['suf_direction'],
        params['address']['unit'],
        dm(params['address']['street_name'])[0],
        params['address']['city'],
        params['address']['zipcode'],
        params['voter_info']['precinct_id'],
        params['voter_info']['voter_id'],
        params['voter_info']['reg_date']
    ]
Esempio n. 3
0
def _metaphones(query, dataset):
    new_query = [dm(given_keyword)[0] for given_keyword in query]
    new_dataset = []
    for data in dataset:
        user_keywords = [dm(user_keyword)[0] for user_keyword in data]
        new_dataset.append(user_keywords)
    return new_query, new_dataset
Esempio n. 4
0
 def compute_similarity_matching_fields(self):
   """Use double metaphone values and store as 'X-Y'."""
   self.name_metaphone = '%s-%s' % metaphone.dm(unicode(self.name)) if self.name else None
   self.address_digits = _filter_non_digits(self.address) if self.address else None
   self.address_metaphone = '%s-%s' % metaphone.dm(unicode(self.address)) if self.address else None
   self.city_metaphone = '%s-%s' % metaphone.dm(unicode(self.city)) if self.city else None
   self.phone_normalised = _filter_non_digits(self.phone1) if self.phone1 else None
Esempio n. 5
0
def palabras_sim(palabras1, palabras2):
    palabras1 = module2.Sortable(palabras1).encode('latin_1').split(' ')
    palabras2 = module2.Sortable(palabras2).encode('latin_1').split(' ')
    cuenta = 0
    for j in range(min(len(palabras1),len(palabras2))):
        cuenta +=  similaridad(dm(palabras1[j]), dm(palabras2[j]))
    return cuenta
Esempio n. 6
0
 def greet_engine(self):
     assistant_name = self.c.config.get('SYSTEM', 'assistant_name')
     meta_name = dm(assistant_name)[0]
     for index, raw_text in enumerate(self.raw_text_array):
         meta_text = dm(raw_text)[0]
         chances = sm(None, meta_name, meta_text).ratio()
         if chances > 0.7:
             self.raw_text_array = self.raw_text_array[index + 1:]
             return
 def computePhonemic(self, word):
     try:
         metaphone_tuples = metaphone.dm(unicode(word))
     except Exception:
         try:
             metaphone_tuples = metaphone.dm(word.decode('utf-8', 'ignore'))
         except Exception:
             metaphone_tuples = metaphone.dm(word.decode('latin-1', 'ignore'))
     return metaphone_tuples
Esempio n. 8
0
 def get_metaphone_level(self, first, second):
     (first_pri, first_sec) = metaphone.dm(first)
     (second_pri, second_sec) = metaphone.dm(second)
     level = 0
     if first_pri == second_pri:
         level = 4
     if first_sec == second_pri or first_pri == second_sec:
         level = 2
     if first_sec == second_sec:
         level = 1
     return level
Esempio n. 9
0
 def get_metaphone_level(self, first, second):
     (first_pri, first_sec) = metaphone.dm(first)
     (second_pri, second_sec) = metaphone.dm(second)
     level = 0
     if first_pri == second_pri:
         level = 4
     if first_sec == second_pri or first_pri == second_sec:
         level = 2
     if first_sec == second_sec:
         level = 1
     return level
    def closest_by_sound(klass, search_string, similarity_threshold = 0.8):
        same = []
        similar = []

        # metaphones of search term
        search_sound = dm(search_string)

        for obj in klass.objects.all():
            name_list = str(obj.name).upper().replace('.', ' ').split()
            surname_guess = max(name_list, key=len) 

            # metaphones of obj name
            obj_sound = dm(surname_guess)

            if search_sound[0] == obj_sound[0]:
                # primary metaphones match exactly
                same.append((obj, obj_sound, 1.0))
                continue
            else:
                if search_sound[1] is not None:
                    # see if secondary metaphone of search_sound matches
                    # primary metaphone of obj
                    if search_sound[1] == obj_sound[0]:
                        same.append((obj, obj_sound, 1.0))
                        continue

                # no exact match, so see if the primary metaphones are similar
                primary_sound_dist = jarow(str(search_sound[0]), str(obj_sound[0]))
                if primary_sound_dist >= similarity_threshold:
                    similar.append((obj, obj_sound, primary_sound_dist))
                    continue

                if search_sound[1] is not None:
                    # still dont have a good match. see if secondary metaphone
                    # of search_sound is similar to obj 
                    secondary_sound_dist = jarow(str(search_sound[1]), str(obj_sound[0]))
                    if secondary_sound_dist >= similarity_threshold:
                        similar.append((obj, obj_sound, secondary_sound_dist))

        if len(same) > 0:
            return same
        else:
            similar.sort(None, operator.itemgetter(2))
            # return similar sounding matches if there are no exact matches
            # limit to top 50 percent if there are more than 5 similar matches
            if len(similar) > 5:
                def average(values):
                    return sum(values, 0.0) / len(values)
                avg_jaro = average([x[2] for x in similar])
                above_avg_jaro = [x for x in similar if (x[2] >= avg_jaro)]
                return search_sound, above_avg_jaro
                
            return similar
Esempio n. 11
0
 def compute_similarity_matching_fields(self):
     """Use double metaphone values and store as 'X-Y'."""
     self.name_metaphone = '%s-%s' % metaphone.dm(unicode(
         self.name)) if self.name else None
     self.address_digits = _filter_non_digits(
         self.address) if self.address else None
     self.address_metaphone = '%s-%s' % metaphone.dm(unicode(
         self.address)) if self.address else None
     self.city_metaphone = '%s-%s' % metaphone.dm(unicode(
         self.city)) if self.city else None
     self.phone_normalised = _filter_non_digits(
         self.phone1) if self.phone1 else None
Esempio n. 12
0
def palabras_sim(palabras1, palabras2):
    palabras1 = module2.Sortable(palabras1).encode('latin_1').split(' ')
    palabras2 = module2.Sortable(palabras2).encode('latin_1').split(' ')
    cuenta = 0
    #for j in range(min(len(palabras1),len(palabras2))):
    for j in range(len(palabras1)):
        if palabras1[j].lower() not in palabras_no_consideradas:
           for k in range(len(palabras2)):
               if palabras2[k].lower() not in palabras_no_consideradas:
                   cuenta +=  similaridad(dm(palabras1[j]), dm(palabras2[k]))
                   #print "comparando ", palabras1[j] , " con ", palabras2[k]
        
    return cuenta
Esempio n. 13
0
def plausibleWords(incorrectWord):
	
	USengDict = open("enUS.txt","r")
	GBengDict = open("enGB.txt","r")
	phoneticDictUS = open("metaphonicDictUS.txt","r")
	phoneticDictGB = open("metaphonicDictGB.txt","r")
	
	temp = (metaphone.dm(incorrectWord))[0]
	plausibleList = []
	plausibleListTemp = []
	
	ctr = 0
	for line in phoneticDictUS:
		if line[:-1] == temp:
			plausibleListTemp.append((ctr,"USprimary"))
		ctr = ctr + 1
	
	ctr = 0
	for line in phoneticDictGB:
		if line[:-1] == temp:
			plausibleListTemp.append((ctr,"GBprimary"))
		ctr = ctr + 1
	
	linesUS = USengDict.readlines()
	linesGB = GBengDict.readlines()
	
	for i in xrange(len(plausibleListTemp)):
		if plausibleListTemp[i][1] == "USprimary":
			plausibleList.append(linesUS[plausibleListTemp[i][0]][:-1])
		else:
			plausibleList.append(linesGB[plausibleListTemp[i][0]][:-1])
	
	return plausibleList
Esempio n. 14
0
def plausibleWords(incorrectWord):

    USengDict = open("enUS.txt", "r")
    GBengDict = open("enGB.txt", "r")
    phoneticDictUS = open("metaphonicDictUS.txt", "r")
    phoneticDictGB = open("metaphonicDictGB.txt", "r")

    temp = (metaphone.dm(incorrectWord))[0]
    plausibleList = []
    plausibleListTemp = []

    ctr = 0
    for line in phoneticDictUS:
        if line[:-1] == temp:
            plausibleListTemp.append((ctr, "USprimary"))
        ctr = ctr + 1

    ctr = 0
    for line in phoneticDictGB:
        if line[:-1] == temp:
            plausibleListTemp.append((ctr, "GBprimary"))
        ctr = ctr + 1

    linesUS = USengDict.readlines()
    linesGB = GBengDict.readlines()

    for i in xrange(len(plausibleListTemp)):
        if plausibleListTemp[i][1] == "USprimary":
            plausibleList.append(linesUS[plausibleListTemp[i][0]][:-1])
        else:
            plausibleList.append(linesGB[plausibleListTemp[i][0]][:-1])

    return plausibleList
Esempio n. 15
0
def sort_text(raw_text):
    raw_text_array = raw_text.lower().split()
    assistant_name = CONFIG['assistant_name']
    meta_name = dm(assistant_name)[0]
    for index, raw_text in enumerate(raw_text_array):
        meta_text = dm(raw_text)[0]
        chances = sm(None, meta_name, meta_text).ratio()
        if chances > 0.7:
            raw_text_array = raw_text_array[index + 1:]
            break
    key_words = raw_text_array.copy()
    sub_words = []
    for index, raw_text in enumerate(raw_text_array):
        if raw_text in RESERVED_WORDS:
            sub_words.append(raw_text)
            key_words.remove(raw_text)
    return sub_words, key_words
Esempio n. 16
0
    def add_item(self, key, value):
        "add a key and associated value(s) to index."
        canon_key = self.normalize_key(key)
        self.literal.setdefault(canon_key, []).append(value)

        for word in canon_key.split():
            self.words.setdefault(word, []).append(canon_key)
            self.alpha_words.setdefault(word[0], []).append(word)
            ph = metaphone.dm(word)
            self.phonetic_words.setdefault(ph[0], []).append(canon_key)
            if ph[1]: self.phonetic_words.setdefault(ph[1], []).append(canon_key)
Esempio n. 17
0
def get_results(misspelt_word,prior_frequencies,ngram_words,matrices,phonetic):
    #start_time = time.time()
    candidate_selections = similarity_prune(ngram_words, misspelt_word, NGRAM_N)
    word_ph = metaphone.dm(misspelt_word)

    trie = TrieNode()
    for word in candidate_selections:
        trie.insert(word)

    results = search(misspelt_word, matrices,trie)
    results = [(x[0],x[1],x[2]*prior_frequencies[x[0]]*phonetic_score(word_ph, phonetic[x[0]])) for x in results]
    print_words_from_list(misspelt_word, sorted(results, key=lambda x: x[2], reverse=True)[:5])
Esempio n. 18
0
def insert_statement(d):
    precinct_id = str(get_precinct(d))
    t = d['street_type']
    if t in street_abbrs:
        t = street_abbrs[t]
    reg_date = '"%s-%s-%s"' % (
        d['reg_date'][4:].strip(), d['reg_date'][0:2], d['reg_date'][2:4])
    flds = [
        '"' + d['last_name'] + '"',
        '"' + d['first_name'] + '"',
        '"' + d['middle_name'] + '"',
        '"' + d['name_suffix'] + '"',
        '"' + dm(d['last_name'])[0] + '"',
        '"' + dm(d['first_name'])[0] + '"',
        d['birth_year'],
        '"' + d['gender'] + '"',
        d['house_number'] if d['house_number'] else 'null',
        '"' + d['pre_direction'] + '"',
        '"' + d['street_name'] + '"',
        '"' + d['street_type'] + '"',
        '"' + d['suf_direction'] + '"',
        '"' + d['unit'] + '"',
        '"' + dm(d['street_name'] + ' ' + t)[0] + '"',
        '"' + d['city'] + '"',
        '"' + d['zipcode'] + '"',
        precinct_id,
        '"' + d['voter_id'] + '"',
        reg_date,
        '"' + d['permanent_absentee'] + '"',
        '"' + d['status'] + '"',
        '"' + d['uocava'] + '"'
    ]
    return ("INSERT INTO voters "
            "(%s) "
            "VALUES (%s);\n") % (
        ','.join(fldnames),
        ','.join(flds)
    )
Esempio n. 19
0
def getPhoneticComparison(s1, s2):
    """
    return the phonetic equality between of two string: [0., .. 1.] : 0 completely different, from 0 to 1: the more ressembling, 1: equal]
    """
    import metaphone
    #~ print metaphone.dm( unicode(s1) )
    #~ print metaphone.dm( unicode(s2) )
    try:
        meta1 = metaphone.dm(s1)[0]
        meta2 = metaphone.dm(s2)[0]
    except BaseException as err:
        print("ERR: can't metaphone '%s' or '%s': err: %s" % (s1, s2, err))
        meta1 = s1
        meta2 = s2
    if meta1 == meta2:
        return 1.

    rMidLen = (len(meta1) + len(meta2)) / 2
    if (rMidLen < 1):
        return 0.
    rDist = 0.9 - levenshtein(meta1, meta2) / float(rMidLen)
    if (rDist < 0.):
        rDist = 0.
    return rDist
Esempio n. 20
0
def preprocessing():
    words = []
    ngram_words = {}
    prior_frequencies = {}
    total_frequencies = 0
    matrices = []
    phonetic = {}

    # Reading dictionary
    with open('data/unixdict.txt') as f:
        for line in f.read().splitlines():
            word = line.split('\t')[0]
            words.append(word)
            phonetic[word] = metaphone.dm(word)
            prior_frequencies[word] = 1 # Doing add one

    #Reading priors        
    with open('data/count_1w.txt') as f:
       for line in f.read().splitlines():
           word = line.split('\t')[0]
           freq = line.split('\t')[1]
           if word in prior_frequencies:
               prior_frequencies[word] = int(freq)
               total_frequencies += int(freq)

    # Divide by total frequency to get probability
    prior_frequencies = {k:v/float(total_frequencies) for k, v in prior_frequencies.iteritems()}
    ngram_words =  ngram_index_structure(words,NGRAM_N)

    # Load matrices
    files = ['data/addoneAddXY.txt', 'data/addoneSubXY.txt', 'data/addoneDelXY.txt', 'data/newCharsXY.txt', 'data/addoneRevXY.txt', 'data/sumnewCharsXY.txt']
    for f in files[:-1]:
        matrix = []
        for lines in file(f).readlines():
            matrix.append([float(x) for x in lines.split()])
        matrices.append(matrix)
    # Last one is a vector, not a matrix
    matrix = []
    for lines in file(files[-1]).readlines():
        matrix.append(float(lines))
    matrices.append(matrix)

    return (prior_frequencies,ngram_words,matrices,words,phonetic)
def phonetic_normalizer(s):
    s = s.lower()
    s = NOT_ALPHANUMSPACE_RE.sub('', s)
    s = "".join(w for w in sorted(s.split(' ')) if w not in SUFFIXES)
    return dm(unicode(s))[0]
Esempio n. 22
0
    def post(self):
        csv_file = self.request.get('file')
        cr = csv.DictReader(csv_file.split('\n'))
	g = geocoders.GoogleV3()
	complete = False
	to_put = []
	regions_list=[]
	kind = None
	for row in cr:
	  try:
	    row["age_group"]
	    kind = "Program"
	  except:
	    kind = None
	  if kind == None:  
	    try:
	      row["main_focus"]
	      kind = "People"
	    except:
	      kind = None


	  
	  failures_array = []
	  if kind == "Program":
	    
	    p = program_db.Program()
	    lat = None
	    lng = None
	    address_string = row["address"] + " " + row["city"] + " " + row["state"]
	    try:
	      if len(address_string) > 10:
		try:
		  place, (lat, lng) = g.geocode(address_string.lower())
		  logging.debug(lat)
		  logging.debug(lng)
		  logging.debug(place)
		except:
		  failures_array.append(address_string)
		  continue
		  #geocodes = g.geocode(address_string.lower())
		  #raise Exception(geocodes[0])
	      
	      setattr(p, "latitude", float(lat))
	      setattr(p, "longitude", float(lng))
	      for key in row.keys():
		  initial_value = str(row[key])
		  #new_value = quoted_value = urllib.quote(initial_value.encode('utf-8'))
		  new_value = unicode(initial_value, 'utf-8')

		  setattr(p, key, new_value)
		  #if key == "name":
		    #name_metaphone = metaphone.dm(unicode(row[key]))
		    #setattr(p, "name_metaphone", str(name_metaphone[0]))
		  if key == "region":
		    regions_list.append(row[key])
	      q = program_db.Program.all()
	      q.filter('latitude = ', float(lat))
	      q.filter('longitude = ', float(lng))
	      #q.filter('name_metaphone = ', str(name_metaphone[0]))

	      if not q.get():
		to_put.append(p)
	      else:
		pass
	      complete = True
	    except:
	      failures_array.append(address_string)
	      continue
	      
	  #if kind == "Location":
	      #p = location_db.Location()

	      #lat = None
	      #lng = None
	      #address_string = row["address"] + " " + row["city"] + " " + row["state"]
	      #place, (lat, lng) = g.geocode(address_string.lower())
	      #setattr(p, "latitude", float(lat))
	      #setattr(p, "longitude", float(lng))
	      #for key in row.keys():
		  #setattr(p, key, row[key])
		  #if key == "name":
		    #name_metaphone = metaphone.dm(unicode(row[key]))
		    #setattr(p, key, str(name_metaphone))
	      #to_put.append(p)
	      #complete = True
	      
	  if kind == "People":

	      p = person_db.Person()

	      for key in row.keys():
		  setattr(p, key, row[key])
		  if key == "name":
		    name_metaphone = metaphone.dm(unicode(row[key]))
		    setattr(p, "name_metaphone", str(name_metaphone[0]))
		  if key == "program":
		    program_metaphone = metaphone.dm(unicode(row[key]))
		    setattr(p, "program_metaphone", str(program_metaphone[0]))
		  if key == "region":
		    regions_list.append(row[key])
		    
	      q = person_db.Person.all()
	      q.filter('name_metaphone = ', str(name_metaphone[0]))
	      #q.get()

	      if not q.get():
		to_put.append(p)
	      complete = True



	if complete:
	  final_list = list(set(to_put))
	  db.put(final_list)
	  q = region_db.Region.all()
	  query = q.fetch(1000)
	  saved_regions_list = []
	  for q in query:
	    saved_regions_list.append(q.name)
	    
	  final_regions_list = list(set(regions_list))
	  
	  to_save_regions = list(set(final_regions_list) - set(saved_regions_list))
	  
	  for region in to_save_regions:
	    r = region_db.Region(name=region)
	    r.put()
	  self.response.write(failures_array)

	  #self.redirect("/import?message=Import complete")
	  return
	self.redirect("/import?message=Nothing Uploaded, the CSV was not valid") 
	return
Esempio n. 23
0
def reconcile_country(raw_country):
    country_map = {u'AFGHANISTAN': u'AFG',
    u'ALBANIA': u'ALB',
    u'ALGERIA': u'DZA',
    u'AMERICAN SAMOA': u'ASM',
    u'ANDORRA': u'AND',
    u'ANGOLA': u'AGO',
    u'ANGUILLA': u'AIA',
    u'ANTARCTICA': None,
    u'ANTIGUA AND BARBUDA': u'ATG',
    u'ARGENTINA': u'ARG',
    u'ARMENIA': u'ARM',
    u'ARUBA': u'ABW',
    u'AUSTRALIA': u'AUS',
    u'AUSTRIA': u'AUT',
    u'AZERBAIJAN': u'AZE',
    u'BAHAMAS': u'BHS',
    u'BAHRAIN': u'BHR',
    u'BANGLADESH': u'BGD',
    u'BARBADOS': u'BRB',
    u'BELARUS': u'BLR',
    u'BELGIUM': u'BEL',
    u'BELIZE': u'BLZ',
    u'BENIN': u'BEN',
    u'BERMUDA': u'BMU',
    u'BHUTAN': u'BTN',
    u'BOLIVIA': u'BOL',
    u'BOSNIA AND HERZEGOVINA': u'BIH',
    u'BOTSWANA': u'BWA',
    u'BOUVET ISLAND': None,
    u'BRAZIL': u'BRA',
    u'BRITISH INDIAN OCEAN TERRITORY': None,
    u'BRUNEI DARUSSALAM': u'BRN',
    u'BULGARIA': u'BGR',
    u'BURKINA FASO': u'BFA',
    u'BURUNDI': u'BDI',
    u'CAMBODIA': u'KHM',
    u'CAMEROON': u'CMR',
    u'CANADA': u'CAN',
    u'CAPE VERDE': u'CPV',
    u'CAYMAN ISLANDS': u'CYM',
    u'CENTRAL AFRICAN REPUBLIC': u'CAF',
    u'CHAD': u'TCD',
    u'CHILE': u'CHL',
    u'CHINA': u'CHN',
    u'CHRISTMAS ISLAND': None,
    u'COCOS (KEELING) ISLANDS': None,
    u'COLOMBIA': u'COL',
    u'COMOROS': u'COM',
    u'CONGO': u'COG',
    u'CONGO, THE DEMOCRATIC REPUBLIC OF THE': u'COD',
    u'COOK ISLANDS': u'C*K',
    u'COSTA RICA': u'CRI',
    u"COTE D'IVOIRE": u'CIV',
    u'CROATIA': u'HRV',
    u'CUBA': u'CUB',
    u'CYPRUS': u'CYP',
    u'CZECH REPUBLIC': u'CZE',
    u'DENMARK': u'DNK',
    u'DJIBOUTI': u'DJI',
    u'DOMINICA': u'DMA',
    u'DOMINICAN REPUBLIC': u'DOM',
    u'ECUADOR': u'ECU',
    u'EGYPT': u'EGY',
    u'EL SALVADOR': u'SLV',
    u'EQUATORIAL GUINEA': u'GNQ',
    u'ERITREA': u'ERI',
    u'ESTONIA': u'EST',
    u'ETHIOPIA': u'ETH',
    u'FALKLAND ISLANDS (MALVINAS)': u'FLK',
    u'FAROE ISLANDS': u'FRO',
    u'FIJI': u'FJI',
    u'FINLAND': u'FIN',
    u'FRANCE': u'FRA',
    u'FRENCH GUIANA': u'GUF',
    u'FRENCH POLYNESIA': u'PYF',
    u'FRENCH SOUTHERN TERRITORIES': None,
    u'GABON': u'GAB',
    u'GAMBIA': u'GMB',
    u'GEORGIA': u'GEO',
    u'GERMANY': u'DEU',
    u'GHANA': u'GHA',
    u'GIBRALTAR': u'GIB',
    u'GREECE': u'GRC',
    u'GREENLAND': u'GRL',
    u'GRENADA': u'GRD',
    u'GUADELOUPE': u'GLP',
    u'GUAM': u'GUM',
    u'GUATEMALA': u'GTM',
    u'GUINEA': u'GIN',
    u'GUINEA-BISSAU': u'GNB',
    u'GUYANA': u'GUY',
    u'HAITI': u'HTI',
    u'HEARD ISLAND AND MCDONALD ISLANDS': None,
    u'HOLY SEE (VATICAN CITY STATE)': u'VAT',
    u'HONDURAS': u'HND',
    u'HONG KONG': u'HKG',
    u'HUNGARY': u'HUN',
    u'ICELAND': u'ISL',
    u'INDIA': u'IND',
    u'INDONESIA': u'IDN',
    u'IRAN, ISLAMIC REPUBLIC OF': u'IRN',
    u'IRAQ': u'IRQ',
    u'IRELAND': u'IRL',
    u'ISRAEL': u'ISR',
    u'ITALY': u'ITA',
    u'JAMAICA': u'JAM',
    u'JAPAN': u'JPN',
    u'JORDAN': u'JOR',
    u'KAZAKHSTAN': u'KAZ',
    u'KENYA': u'KEN',
    u'KIRIBATI': u'KIR',
    u"KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF": u'PRK',
    u'KOREA, REPUBLIC OF': u'KOR',
    u'KUWAIT': u'KWT',
    u'KYRGYZSTAN': u'KGZ',
    u"LAO PEOPLE'S DEMOCRATIC REPUBLIC": u'LAO',
    u'LATVIA': u'LVA',
    u'LEBANON': u'LBN',
    u'LESOTHO': u'LSO',
    u'LIBERIA': u'LBR',
    u'LIBYAN ARAB JAMAHIRIYA': u'LBY',
    u'LIECHTENSTEIN': u'LIE',
    u'LITHUANIA': u'LTU',
    u'LUXEMBOURG': u'LUX',
    u'MACAO': u'MAC',
    u'MACEDONIA, THE FORMER YUGOSLAV REPUBLIC OF': u'MKD',
    u'MADAGASCAR': u'MDG',
    u'MALAWI': u'MWI',
    u'MALAYSIA': u'MYS',
    u'MALDIVES': u'MDV',
    u'MALI': u'MLI',
    u'MALTA': u'MLT',
    u'MARSHALL ISLANDS': u'MHL',
    u'MARTINIQUE': u'MTQ',
    u'MAURITANIA': u'MRT',
    u'MAURITIUS': u'MUS',
    u'MAYOTTE': None,
    u'MEXICO': u'MEX',
    u'MICRONESIA, FEDERATED STATES OF': u'FSM',
    u'MOLDOVA, REPUBLIC OF': u'MDA',
    u'MONACO': u'MCO',
    u'MONGOLIA': u'MNG',
    u'MONTSERRAT': u'MSR',
    u'MOROCCO': u'MAR',
    u'MOZAMBIQUE': u'MOZ',
    u'MYANMAR': u'MMR',
    u'NAMIBIA': u'NAM',
    u'NAURU': u'NRU',
    u'NEPAL': u'NPL',
    u'NETHERLANDS': u'NLD',
    u'NETHERLANDS ANTILLES': u'ANT',
    u'NEW CALEDONIA': u'NCL',
    u'NEW ZEALAND': u'NZL',
    u'NICARAGUA': u'NIC',
    u'NIGER': u'NER',
    u'NIGERIA': u'NGA',
    u'NIUE': u'NIU',
    u'NORFOLK ISLAND': u'NFK',
    u'NORTHERN MARIANA ISLANDS': u'MNP',
    u'NORWAY': u'NOR',
    u'OMAN': u'OMN',
    u'PAKISTAN': u'PAK',
    u'PALAU': u'PLW',
    u'PALESTINIAN TERRITORY, OCCUPIED': None,
    u'PANAMA': u'PAN',
    u'PAPUA NEW GUINEA': u'PNG',
    u'PARAGUAY': u'PRY',
    u'PERU': u'PER',
    u'PHILIPPINES': u'PHL',
    u'PITCAIRN': u'PCN',
    u'POLAND': u'POL',
    u'PORTUGAL': u'PRT',
    u'PUERTO RICO': u'PRI',
    u'QATAR': u'QAT',
    u'REUNION': u'REU',
    u'ROMANIA': u'ROM',
    u'RUSSIAN FEDERATION': u'RUS',
    u'RWANDA': u'RWA',
    u'SAINT HELENA': u'SHN',
    u'SAINT KITTS AND NEVIS': u'KNA',
    u'SAINT LUCIA': u'LCA',
    u'SAINT PIERRE AND MIQUELON': u'SPM',
    u'SAINT VINCENT AND THE GRENADINES': u'VCT',
    u'SAMOA': u'WSM',
    u'SAN MARINO': u'SMR',
    u'SAO TOME AND PRINCIPE': u'STP',
    u'SAUDI ARABIA': u'SAU',
    u'SENEGAL': u'SEN',
    u'SERBIA AND MONTENEGRO': None,
    u'SEYCHELLES': u'SYC',
    u'SIERRA LEONE': u'SLE',
    u'SINGAPORE': u'SGP',
    u'SLOVAKIA': u'SVK',
    u'SLOVENIA': u'SVN',
    u'SOLOMON ISLANDS': u'SLB',
    u'SOMALIA': u'SOM',
    u'SOUTH AFRICA': u'ZAF',
    u'SOUTH GEORGIA AND THE SOUTH SANDWICH ISLANDS': None,
    u'SPAIN': u'ESP',
    u'SRI LANKA': u'LKA',
    u'SUDAN': u'SDN',
    u'SURINAME': u'SUR',
    u'SVALBARD AND JAN MAYEN': u'SJM',
    u'SWAZILAND': u'SWZ',
    u'SWEDEN': u'SWE',
    u'SWITZERLAND': u'CHE',
    u'SYRIAN ARAB REPUBLIC': u'SYR',
    u'TAIWAN, PROVINCE OF CHINA': u'TWN',
    u'TAJIKISTAN': u'TJK',
    u'TANZANIA, UNITED REPUBLIC OF': u'TZA',
    u'THAILAND': u'THA',
    u'TIMOR-LESTE': None,
    u'TOGO': u'TGO',
    u'TOKELAU': u'TKL',
    u'TONGA': u'TON',
    u'TRINIDAD AND TOBAGO': u'TTO',
    u'TUNISIA': u'TUN',
    u'TURKEY': u'TUR',
    u'TURKMENISTAN': u'TKM',
    u'TURKS AND CAICOS ISLANDS': u'TCA',
    u'TUVALU': u'TUV',
    u'UGANDA': u'UGA',
    u'UKRAINE': u'UKR',
    u'UNITED ARAB EMIRATES': u'ARE',
    u'UNITED KINGDOM': u'GBR',
    u'UNITED STATES': u'USA',
    u'UNITED STATES MINOR OUTLYING ISLANDS': None,
    u'URUGUAY': u'URY',
    u'UZBEKISTAN': u'UZB',
    u'VANUATU': u'VUT',
    u'VENEZUELA': u'VEN',
    u'VIET NAM': u'VNM',
    u'VIRGIN ISLANDS, BRITISH': u'VGB',
    u'VIRGIN ISLANDS, U.S.': u'VIR',
    u'WALLIS AND FUTUNA': u'WLF',
    u'WESTERN SAHARA': u'ESH',
    u'YEMEN': u'YEM',
    u'ZAMBIA': u'ZMB',
    u'ZIMBABWE': u'ZWE'}

    # check if term is a key in country_map
    if raw_country.upper() in country_map:
        return True, country_map[raw_country.upper()] 
	
    search_sound = dm(unicode(raw_country))
    suggestions = []
    for c in country_map.keys():
        country_sound = dm(unicode(c))
	if search_sound[0] == country_sound[0]:
	    suggestions.append((1.0, raw_country, c, country_map[c]))
	    continue
	else:
	    if search_sound[1] is not None:
		# see if secondary metaphone of search_sound matches
		# primary metaphone of obj
		if search_sound[1] == country_sound[0]:
		    suggestions.append((1.0, raw_country, c, country_map[c]))
		    continue

	    # no exact match, so see if the primary metaphones are similar
	    primary_sound_dist = jarow(str(search_sound[0]), str(country_sound[0]))
	    if primary_sound_dist >= 2:
		similar.append((primary_sound_dist, raw_country, c, country_map[c]))
		continue
    return False, {'double-metaphone': suggestions}