Ejemplo n.º 1
0
def findPhonetics(subject, bookshelf):
    dictionary = open(bookshelf)
    try:
        subPhonetic = metaphone(subject)
    except TypeError:
        print("Soundex broke")
        return

    levenNumber = 0
    similar = []

    for line in dictionary:
        if "-" not in line:
            line = line.split("\n")[0]
            try:
                linePhonetic = metaphone(line)
                if line != subject and linePhonetic.find(subPhonetic) != -1:
                    subjectIndex = findPhoneticIndex(subject, line)
                    similar.append(
                        (line,
                         levenshtein(
                             line[subjectIndex:subjectIndex + len(subject)],
                             subject)))

            except TypeError:
                #print("Broke on " + line + "... Continuing")
                continue

    similar.sort(key=lambda tup: tup[1])
    return similar
Ejemplo n.º 2
0
def phonetic_weight(name_to_remember, current_trigger_word,pw):
#create metaphone codes without the first unnecessary letters
    if name_to_remember[0] == ('w' or 'q'):
        w1_without_fl = phonetics.metaphone(name_to_remember[2:])
    else:
        w1_without_fl = phonetics.metaphone(name_to_remember[1:]) 
    if current_trigger_word[0] == ('w' or 'q'):
        w2_without_fl = phonetics.metaphone(current_trigger_word[2:])
    else: 
        w2_without_fl = phonetics.metaphone(current_trigger_word[1:]) 
#calculate the levenshtein distance between the two metaphone codes
    score_without_fl = enchant.utils.levenshtein(w1_without_fl, w2_without_fl)

#create a list of all known rhyming words from the pronouncing library
    checklist = pronouncing.rhymes(name_to_remember)

#check if the candidate trigger word is in the list of know rhyming words 
    if current_trigger_word in checklist:
        return pw
#otherwise calculate its phonetic similarity using the users pw(phonetic weight) and ..
#..the levenshtein/metaphone score
    elif score_without_fl == 0:
        return pw/1.5
    else: 
        return pw/(score_without_fl+1)

#the_list = create_output_list_v3(doc,start_word, pw, slw, flw, pwsw)
                    
Ejemplo n.º 3
0
def get_similar_metaphone(DF_NAMELIST):
    """
    match lastname -> match firstname -> match middlename's first letter
    input:
        pandas data frame of names(indexed)
    output:
        pandas data fame of matched names
    """
    df = DF_NAMELIST
    df['f_meta'] = [metaphone(s) for s in df['firstname']]
    df['l_meta'] = [metaphone(s) for s in df['lastname']]

    df.sort_values(['f_meta', 'l_meta'])
    metamong = []
    for i in range(df.shape[0]):
        Collide1 = False
        if i != 0:
            Collide1 = True
            for attr in ['f_meta', 'l_meta']:
                if df.loc[i, attr] != df.loc[i - 1, attr]:
                    Collide1 = False

        Collide2 = False
        if i != df.shape[0] - 1:
            Collide2 = True
            for attr in ['f_meta', 'l_meta']:
                if df.loc[i, attr] != df.loc[i + 1, attr]:
                    Collide2 = False

        if Collide1 or Collide2:
            metamong.append(df.loc[i])
    metamong = pd.DataFrame(metamong)
    print("metaphone list created")
    return (metamong)
Ejemplo n.º 4
0
def compare(a,b):
    a = a.lower() # convert all words to lower case for easier comparison. 
    b = b.lower()
    m = ph.metaphone(a)==ph.metaphone(b) # compare strings using metaphone
    s = ph.soundex(a)==ph.soundex(b) # compare strings using soundex
    n = ph.nysiis(a)==ph.nysiis(b) # compare strings using nysiis
    return m + s + n 
Ejemplo n.º 5
0
def findPhoneticIndex(subject, word):
    subjectPhone = metaphone(subject)

    for i in range(0, len(word)):
        wordPhone = metaphone(word[i:len(word)])
        if subjectPhone == wordPhone[0:len(subjectPhone)]:
            return i
def metaphone(collection):
    """
    Returns a list of metaphone encoded collection.
    
    Arguments:
    collection  -- the list of words to be encoded using metaphone.
    limit       -- the limit to the words.
    """
    
    try:
        assert type(collection) == list or type(collection) == str
    except AssertionError:
        print("The collection for metaphone is not a string or a list.")

    from phonetics import metaphone

    if type(collection) == str:
        return metaphone(collection)
 
    collectionEncoded = list()
    for word in collection:
        wordEncoded = metaphone(word)
        collectionEncoded.append(wordEncoded)
        

    return collectionEncoded
Ejemplo n.º 7
0
def similarity(word1, word2):
    nysiis1 = phonetics.nysiis(word1)
    nysiis2 = phonetics.nysiis(word2)
    nysiis_distance = levenshtein(nysiis1, nysiis2)

    metaphone1 = phonetics.metaphone(word1)
    metaphone2 = phonetics.metaphone(word2)
    metaphone_distance = levenshtein(metaphone1, metaphone2)

    dmetaphone1 = phonetics.dmetaphone(word1)
    dmetaphone2 = phonetics.dmetaphone(word2)
    dmetaphone_distance = levenshtein(dmetaphone1, dmetaphone2)

    # return a linear combination of these distances
    return nysiis_distance * 0.2 + metaphone_distance * 0.4 + dmetaphone_distance * 0.6
Ejemplo n.º 8
0
    def fuzzy_hash_string(self):
        """The fuzzy hash string, before hashing. Useful for computing string distances. """
        import unicodedata
        from phonetics import metaphone

        s = '|'.join([
            str(self.number),
            self.multinumber or '.',
            metaphone(self.street_name) if self.street_name else '.',
            metaphone(self.city) if self.city else '.',
            metaphone(self.state) if self.state else '.',
            str(self.zip4) if self.zip4 else '.'
        ]).lower()

        return unicodedata.normalize('NFC', s)
Ejemplo n.º 9
0
def phonetic_street_compare():
    st_name = request.args.get("st_name")
    st_name = re_pattern.sub("", st_name.lower())
    soundex = phonetics.soundex(st_name)
    print(soundex)

    results = set()
    soundexes = MongoAddress.objects(st_city="EL CAMPO", soundex=soundex)
    if soundexes:
        for address in soundexes:
            if address["st_name"] in results:
                continue
            else:
                results.add(address["st_name"])

    metaphone = phonetics.metaphone(request.args.get("st_name").lower())
    metaphones = MongoAddress.objects(st_city="EL CAMPO", metaphone=metaphone)
    if soundexes:
        for address in metaphones:
            if address["st_name"] in results:
                continue
            else:
                results.add(address["st_name"])

        return jsonify({"phonetics": list(results)})
    else:
        return jsonify("")
Ejemplo n.º 10
0
def text2piano(text: str):
    """
    Converts text string to piano sounds.
    # text to metaphones
    # metaphone to piano key
    # generate piano key sequence
    # write piano key sequence as mp3
    # find audio files for piano keys in fingerprint
    # concatenate audio files into one file
    :param text:
    :return: mp3 file
    :example:
    :TODO: at least one test. add rests.
    """
    _notes = [
        'A2', 'A3', 'A4', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'Ds1', 'Ds2',
        'Ds3', 'Ds4', 'Fs2', 'Fs3', 'Fs4'
    ]
    _metaphones = list('0BFHJKLMNPRSTWXY')
    _meta2piano = dict(zip(_metaphones, _notes))
    metaphones = list(phonetics.metaphone(text))
    fingerprint = [_meta2piano[phone] for phone in metaphones]
    filenames = [f'{mypath}{f}_quart.mp3' for f in fingerprint]

    _mp3s = [AudioSegment.from_mp3(file) for file in filenames]
    _mp3 = sum(_mp3s)
    _outputmp3 = f"{mypath}{text}{''.join(metaphones)}.mp3".replace(' ', '')
    logging.warning(_outputmp3)
    _mp3.export(_outputmp3, format="mp3")
Ejemplo n.º 11
0
 def similarity(self, other: str) -> float:
     res_seqmat = SequenceMatcher(None, self.name, other).ratio()
     res_lev = Levenshtein.distance(self.name, other)
     res_met = Levenshtein.distance(phonetics.metaphone(self.name),
                                    phonetics.metaphone(other))
     phon_this = phonetics.dmetaphone(self.name)
     phon_oher = phonetics.dmetaphone(other)
     min_so_far = 9999999
     for i in phon_this:
         for j in phon_oher:
             min_so_far = min(min_so_far, Levenshtein.distance(i, j))
     res_dmet = min_so_far
     weights = {"seqmat": 0.1, "lev": 0.5, "met": 0.2, "dmet": 0.3}
     return (res_seqmat * weights['seqmat'] + res_lev * weights['lev'] +
             res_met * weights['met'] +
             res_dmet * weights['dmet']) / 4.0
Ejemplo n.º 12
0
		def searchString(qNom, qReg, mode = 0):
			qT = []
			#pdb.set_trace()

			if qNom != None:
				regxN = re.compile(qNom, re.IGNORECASE)
				regxV = re.compile(qVille, re.IGNORECASE)
				q1 = {"$or": [ {"nom": {"$regex": regxN } } , {"ingr": {"$regex": regxV} } ]}
				if mode == 1:
					q1 = {"nomU": {"$regex": '.*' + scanName(qNom) + '.*'} }
				if mode == 2:
					phonetic=phonetics.metaphone(qNom)
					q1 = {"nomP": {"$regex": '.*' + phonetic + '.*'} } 
				qT.append(q1)
			
			if qReg != None:
				#pdb.set_trace()
				q2 = {'cat._id': qReg}
				qT.append(q2)

			#pdb.set_trace()
			userRole = getUserRole()
			if not (localHost or userRole == 'ADM' or userRole == 'MEA'):
				qT.append({"state": 1 })

			return { "$and": qT }
def create_metaphone_representation(lyrics):
    """ Returns the Metaphone representation of a string """

    metaphone_repr = ''
    for word in lyrics.split():
        try:
            metaphone_repr += phonetics.metaphone(word) + ' '
        except:
            print(word)
    return metaphone_repr.rstrip()
def word_to_phonetic_distance(word1, word2):
    code_distance = dict()
    # weights
    weight = {"soundex": 0.3, "metaphone": 0.5, "nysiis": 0.2}
    edit_distance = 0
    try:
        code_distance[
            'soundex'] = weight['soundex'] * damerau_levenshtein_distance(
                phonetics.soundex(word1), phonetics.soundex(word2))
        code_distance[
            'metaphone'] = weight['metaphone'] * damerau_levenshtein_distance(
                phonetics.metaphone(word1), phonetics.metaphone(word2))
        code_distance[
            'nysiis'] = weight['nysiis'] * damerau_levenshtein_distance(
                phonetics.nysiis(word1), phonetics.nysiis(word2))
        # Edit distance according to weight vector
        edit_distance = code_distance['soundex'] + code_distance[
            'metaphone'] + code_distance['nysiis']
    except:

        pass

    return (edit_distance)
Ejemplo n.º 15
0
def _misspelt(spell, spells):
    """
    Returns None if the spell is correct or not existing. Otherwise returns the
    misspelt spell.
    :param spell:
    :param spells:
    :return:
    """
    from phonetics import metaphone
    from editdistance import eval as edit_distance

    assert spell
    # log.debug("Looking for %r in %r", spell, spells)
    if spell in spells:
        return (spell, SPELL_OK)

    phonetic_spell = metaphone(spell)[:5]
    for existing_spell in spells:
        if edit_distance(metaphone(existing_spell)[:5], phonetic_spell) <= 2:
            log.warning("Incantesimo scorretto: %r invece di %r", spell,
                        existing_spell)
            return (existing_spell, SPELL_KO)

    return (spell, SPELL_MISSING)
Ejemplo n.º 16
0
    def __init__(self, dictionary):
        GenericAlgorithm.__init__(self, dictionary)
        self.phoneticDictionary = {}

        for word in self.dictionary:
            phono = phonetics.metaphone(word)

            try:
                self.phoneticDictionary[phono][word] = 1

            except KeyError:
                self.phoneticDictionary[phono] = {}
                self.phoneticDictionary[phono][word] = 1

        self.name = "metaphone"
        tFinish = int(time.time() * 1000)
        self.stats['runtime'] += tFinish - self.timeStart
Ejemplo n.º 17
0
 def max_sound_like(self,word):
     """
     Find all the words that are likely to be mistakes from the user not knowing how to spell it but knowing how it sounds.
     """
     meta=metaphone(word)
     rewords=[]
     
     if meta in self.learned:
         words=sorted(self.learned[meta].items(),key=operator.itemgetter(1),reverse=1)
         
         if word not in [i[0] for i in words]:
             if len(words) == 1:
                 rewords.append(words[0][0])
             else:
                 rewords+=[i[0] for i in words]
     
     if len(rewords) > 0:
         return rewords
     else:
         return False
Ejemplo n.º 18
0
    def max_sound_like(self,word):
        """
        Find all the words that are likely to be mistakes from the user not knowing how to spell it but knowing how it sounds.
        """
        meta=metaphone(word)
        rewords=[]

        if meta in self.learned:
            words=sorted(self.learned[meta].items(),key=operator.itemgetter(1),reverse=1)

            if word not in [i[0] for i in words]:
                if len(words) == 1:
                    rewords.append(words[0][0])
                else:
                    rewords+=[i[0] for i in words]

        if len(rewords) > 0:
            return rewords
        else:
            return False
Ejemplo n.º 19
0
    def findCorrections(self, typo):
        GenericAlgorithm.findCorrections(self, typo)

        self.timeStart = int(time.time() * 1000)
 #           pass

 #       else:
        try:
            phono = phonetics.metaphone(typo)
            if self.phoneticDictionary[phono]:

                for word in self.phoneticDictionary[phono].keys():

                    self.possibleSpellings[typo][word] = 1

        except KeyError:

            pass
        tFinish = int(time.time() * 1000)
        self.stats['runtime'] += tFinish - self.timeStart
Ejemplo n.º 20
0
    def read(self):
        connection_file = open(self._connection_file, 'r')
        try:
            qByUser = connection_file.read()
            self._User_Data = json.loads(qByUser)
        except ValueError:
            print
            'Decoding JSON has failed'
        connection_file.close()

        for index, val in enumerate(self._User_Data):
            self._Metaphone_User_Data.append(metaphone(val))

        To_Add = dict(zip(self._User_Data, self._Metaphone_User_Data))

        for key, val in To_Add.items():
            if key in self._User_Data:
                print(val)
                self._User_Data[key] = [self._User_Data[key], val]

        with open(self._Output_File_Name, 'w') as outfile:
            json.dump(self._User_Data, outfile, indent=4)
Ejemplo n.º 21
0
		def searchString(qNom, qReg, dist, lng, lat, mode = 0):
			qT = []
			#pdb.set_trace()
			if qNom != None:
				regxN = re.compile(qNom, re.IGNORECASE)
				regxV = re.compile(qVille, re.IGNORECASE)
				q1 = {"$or": [ {"nom": {"$regex": regxN } } , {"municipal": {"$regex": regxV} } ]}
				if mode == 1:
					q1 = {"nomU": {"$regex": '.*' + scanName(qNom) + '.*'} }
				if mode == 2:
					phonetic=phonetics.metaphone(qNom)
					q1 = {"nomP": {"$regex": '.*' + phonetic + '.*'} } 
				qT.append(q1)
			
			if qReg != None:
				q2 = {'region': qReg}
				qT.append(q2)

			if dist != None:
				q3 = {"location": { "$near" : {"$geometry": { "type": "Point",  "coordinates": [ lng , lat ] }, "$maxDistance": dist }}};
				qT.append(q3)
			return { "$and": qT }
Ejemplo n.º 22
0
    def Load_learned(self):
        """
        Load the metaphone array, and if it doesn't exist, create it.
        """
        if len(self.learned) == 0:
            if os.path.exists(self.guten_pickle):
                with open(self.guten_pickle, 'rb') as infile:
                    self.learned=pickle.load(infile)
            else:
                if len(self.gutenburg) == 0:
                    self.Load_dict()

                for word,times in self.gutenburg.items():
                    meta=metaphone(word.replace("'",""))

                    #add and up the frequency of the words
                    if meta not in self.learned:
                        self.learned[meta]={word:int(times)}
                    elif word not in self.learned[meta]:
                        self.learned[meta][word]=int(times)

                if len(self.learned) > 0:
                    with open(self.guten_pickle, 'wb') as outfile:
                            pickle.dump(self.learned, outfile)
Ejemplo n.º 23
0
 def Load_learned(self):
     """
     Load the metaphone array, and if it doesn't exist, create it.
     """
     if len(self.learned) == 0:
         if os.path.exists(self.guten_pickle):
             with open(self.guten_pickle, 'rb') as infile:
                 self.learned=pickle.load(infile)
         else:
             if len(self.gutenburg) == 0:
                 self.Load_dict()
             
             for word,times in self.gutenburg.items():
                 meta=metaphone(word.replace("'",""))
                 
                 #add and up the frequency of the words
                 if meta not in self.learned:
                     self.learned[meta]={word:int(times)}
                 elif word not in self.learned[meta]:
                     self.learned[meta][word]=int(times)
                     
             if len(self.learned) > 0:
                 with open(self.guten_pickle, 'wb') as outfile:
                         pickle.dump(self.learned, outfile)
Ejemplo n.º 24
0
    async def on_message(self, message):
        if isinstance(message.channel, discord.DMChannel) == False:
            return
        if message.author.id == self.user.id:
            return

        words = message.content.lower()
        words = re.sub(r'([^\s\w]|_)+', '', words)
        words = words.split()
        metaphones = []
        for word in words:
            metaphones.append(phonetics.metaphone(word))

        desired_platform_name = ''
        pc_keywords = ['AN', 'ANTS', 'PK', 'STM']
        xbox_keywords = ['SPKS', 'SP']
        playstation_keywords = ['S', 'PLSTXN']

        for left_word in metaphones:
            for right_word in pc_keywords:
                if self.doWordsMatch(left_word, right_word) == True:
                    desired_platform_name = 'PC'
                    break

            if desired_platform_name != '':
                break

            for right_word in xbox_keywords:
                if self.doWordsMatch(left_word, right_word) == True:
                    desired_platform_name = 'Xbox'
                    break

            if desired_platform_name != '':
                break

            for right_word in playstation_keywords:
                if self.doWordsMatch(left_word, right_word) == True:
                    desired_platform_name = 'PlayStation'
                    break

            if desired_platform_name != '':
                break

        if desired_platform_name == '':
            await message.channel.send(
                'Sorry, I didn\'t understand which platform you meant. Try saying `PC`, `Xbox`, or `PlayStation`.'
            )
            return

        for guild in client.guilds:
            remove_roles = []
            add_roles = []
            for role in guild.roles:
                if role.name == desired_platform_name:
                    add_roles.append(role)
                elif role.name == 'Xbox' or role.name == 'PC' or role.name == 'PlayStation':
                    remove_roles.append(role)

            member = guild.get_member(message.author.id)
            if member == None:
                continue

            for role in remove_roles:
                await member.remove_roles(role)
            for role in add_roles:
                await member.add_roles(role)

        await message.channel.send(
            'Ok, I\'ve set your platform to {0}. You can now access the Beacon Discord server\'s general channel.'
            .format(desired_platform_name))
Ejemplo n.º 25
0
def phonetic_similarity(w1, w2):
    w1 = w1.replace("'",'')
    w2 = w2.replace("'", '')
    # print(w1, w2)
    return 0.7 * edit_distance(ph.metaphone(w1), ph.metaphone(w2)) + \
           0.3 * edit_distance(ph.soundex(w1), ph.soundex(w2))
Ejemplo n.º 26
0
def saveRecet(param, self):
	""" Save Recette data """

	try:
		if param.get("data"):
			
			#pdb.set_trace()
			obj = loads(param['data'][0])
			imgURL = ""
			if param.get("imgURL"):
				imgURL = param['imgURL'][0]
				#print(str(imgURL))
			""" Save Recette data """
			cookie = cherrypy.request.cookie

			if localHost or checkSession(self, role = ['ADM','MEA']):
			#if True:

				coll = dataBase.recettes
				
				oIngr = obj["ingr"]
				oPrep = obj["prep"]
				
				nomU = scanName(obj["nom"])
				nomP = phonetics.metaphone(obj["nom"])
				Cuser = getCurrentUser()
				state = 1 if obj["state"] else 0
				actTime = int(time.time() * 1000)
				
				editData = ""
				if param.get("editor"):
					editData = param['editor'][0]

				if obj["ID"] == "NEW":	# Nouvelle recette
					oID = ObjectId()
					#doc = coll.insert({ '$set': {'nom': obj["nom"], 'nomU': nomU, 'nomP': nomP, "dateC": int(time.time() * 1000), "dateM": int(time.time() * 1000), 'temp': obj["temp"], 'port': obj["port"], 'cuis': obj["cuis"], 'cat': obj["cat"], 'url': obj["url"], "state": 1, 'ingr': oIngr, 'prep': oPrep } },  {"new":True} )
					doc = coll.update({ '_id': oID}, { '$set': {'nom': obj["nom"], 'nomU': nomU, 'nomP': nomP, "dateC": actTime, "dateM": actTime, 'userID': Cuser, 'temp': obj["temp"], 'port': obj["port"], 'cuis': obj["cuis"], 'cat': obj["cat"], 'url': obj["url"], "state": state, 'imgURL': imgURL, 'ingr': oIngr, 'prep': oPrep, 'edit': editData } },  upsert=True )
				else:
					oldDoc = loads(getRecette({'data':[ obj["ID"] ]}, self))['rec'][0]
					oID = ObjectId(obj["ID"])
					doc = coll.update({ '_id': oID}, { '$set': {'nom': obj["nom"], 'nomU': nomU, 'nomP': nomP, "dateM": actTime, 'userID': Cuser, 'temp': obj["temp"], 'port': obj["port"], 'cuis': obj["cuis"], 'cat': obj["cat"], 'url': obj["url"], "state": state, 'imgURL': imgURL, 'ingr': oIngr, 'prep': oPrep, 'edit': editData } },  upsert=True )
					#doc = coll.update_one({ '_id': oID}, { '$set': {'nom': obj["nom"], 'nomU': nomU, 'nomP': nomP, 'temp': obj["temp"], 'port': obj["port"], 'cuis': obj["cuis"], 'cat': obj["cat"], 'url': obj["url"], 'ingr': oIngr, 'prep': oPrep } },  upsert=True )
					newDoc = loads(getRecette({'data':[ obj["ID"] ]}, self))['rec'][0]
					obj={'time': actTime, 'userID': oldDoc['userID']}
					
					for key in newDoc:
						if key != "_id" and key != "hist" and key != "dateM" and key != "nomU" and key != "nomP":
							#print(key + " = " + str(docs[0][key]))
							if newDoc[key] != oldDoc[key]:
								obj[key] = oldDoc[key]
					if len(obj) > 2 and (actTime - oldDoc['dateM'] > 86400000 or oldDoc['userID'] != Cuser): # If modified add history
						#pdb.set_trace()
						doc = coll.update( { '_id': oID}, {'$push': {'hist': obj  }},  upsert=True )
					

				
				return dumps(doc)
			else: 
				return ('{"n":0,"ok":0, "message": "S0062"}')	# Check Session error
		else:
			return dumps({'ok': 0})	# No param
	except Exception as ex:
		return except_handler("saveRecet", ex)
Ejemplo n.º 27
0
def saveClub(param, self):
	""" Save Club, courses and blocs data """

	try:
		if param.get("data"):
			
			#pdb.set_trace()
			obj = loads(param['data'][0])
			def saveBlocs(tupC, Bids):
				""" Save blocs data for the courses """
				blocRes = []
				coll = dataBase.blocs
				def getBlocID():
					docID = coll.find({}).sort("_id",-1).limit(1)
					return int(docID[0]["_id"] + 1)
				
				for bloc in oBlocs:
					res=dict()
					
					if len(str(bloc["_id"])) < 9 and int(bloc["_id"]) > 1000000:	# Not ObjectID and new attributed bloc ID 
						res["oldID"] = bloc["_id"]
						bloc["_id"] =  ObjectId()  #getBlocID()
						res["newID"] = str(bloc["_id"])
						for y in tupC:
							if bloc["PARCOURS_ID"] in y:
								bloc["PARCOURS_ID"] = y[1]	# Replace PARCOURS_ID by res["newID"] attributed
					else:
						bloc["_id"] = getID(str(bloc["_id"]))
						if bloc["_id"] in Bids:
							Bids.remove(bloc["_id"])
					#print("save id " + str(bloc["_id"]) + "  PARCOURS_ID " + str(bloc["PARCOURS_ID"]))
					doc = coll.update({ '_id': bloc["_id"]}, { '$set': {'PARCOURS_ID': bloc["PARCOURS_ID"], 'Bloc': bloc["Bloc"], 'T1': bloc["T1"], 'T2': bloc["T2"], 'T3': bloc["T3"], 'T4': bloc["T4"], 'T5': bloc["T5"], 'T6': bloc["T6"], 'T7': bloc["T7"], 'T8': bloc["T8"], 'T9': bloc["T9"], 'T10': bloc["T10"], 'T11': bloc["T11"], 'T12': bloc["T12"], 'T13': bloc["T13"], 'T14': bloc["T14"], 'T15': bloc["T15"], 'T16': bloc["T16"], 'T17': bloc["T17"], 'T18': bloc["T18"], 'Aller': bloc["Aller"], 'Retour': bloc["Retour"], 'Total': bloc["Total"], 'Eval': bloc["Eval"], 'Slope': bloc["Slope"] } },  upsert=True )

					res["result"]=doc
					res["result"]["_id"] = bloc["_id"]
					blocRes.append(res)

				docs = coll.remove({"_id": {"$in": Bids } })
				return blocRes, Bids
				
			def saveCourses(clubID, tupC, Pids):
				""" Save courses data for the Club """
				courseRes = []
				coll = dataBase.parcours
				def getCourseID():
					docID = coll.find({}).sort("_id",-1).limit(1)
					return int(docID[0]["_id"] + 1)
				def removeCourse(Pids):
					collB = dataBase.blocs
					docs = coll.remove({"_id": {"$in": Pids } })			# Remove Courses
					docs = collB.remove({"PARCOURS_ID": {"$in": Pids } })	# Remove Bloc Courses
					collG = dataBase.golfGPS
					docs = collG.remove({"Parcours_id": {"$in": Pids } })	# Remove GPS Courses   À TESTER
					return

				for parc in oCourses:
					res=dict()
					if parc["_id"] > 1000000:
						res["oldID"] = parc["_id"]
						parc["_id"] = getCourseID()
						res["newID"] = parc["_id"]
						tupC = tupC,(res["oldID"],res["newID"])
					#removeBloc(parc["_id"])
					#print("save courses " + str(parc["_id"]))
					doc = coll.update({ '_id': parc["_id"]}, { '$set': {'CLUB_ID': parc["CLUB_ID"], 'POINTS': parc["POINTS"], 'PARCOURS': parc["PARCOURS"], 'DEPUIS': parc["DEPUIS"], 'TROUS': parc["TROUS"], 'NORMALE': parc["NORMALE"], 'VERGES': parc["VERGES"], 'GPS': parc["GPS"] } },  upsert=True )
					res["result"]=doc
					res["result"]["_id"] = parc["_id"]
					courseRes.append(res)
					if parc["_id"] in Pids:
						Pids.remove(parc["_id"])
				
				if len(Pids) > 0:
					removeCourse(Pids)
				return courseRes, tupC, Pids
				#[{'_id': '39', 'CLUB_ID': 47, 'POINTS': '24', 'PARCOURS': '', 'DEPUIS': '1990', 'TROUS': '18', 'NORMALE': '72', 'VERGES': '6322', 'GPS': True}, {'_id': 61, 'CLUB_ID': 47, 'POINTS': 'E', 'PARCOURS': '', 'DEPUIS': 0, 'TROUS': 9, 'NORMALE': 27, 'VERGES': 815, 'GPS': False}]
			
			""" Save Club data """

			if localHost or checkSession(self, role = ['ADM','MEA']):
			#if True:
				coll = dataBase.club
				def getClubID():
					docID = coll.find({}).sort("_id",-1).limit(1)
					return int(docID[0]["_id"] + 1)

				tupC = (0,0),(0,0)	# For new PARCOURS_ID in blocs
				oClub = obj["club"]
				oCourses = obj["course"]
				if 'blocs' in obj:
					oBlocs = obj["blocs"]
				#Postal code
				cp = oClub["codp"]
				cp = cp.upper()
				cp = re.sub(r" ", "", cp)
				cps = cp
				matchObj = re.match("^(?!.*[DFIOQU])[A-VXY][0-9][A-Z]●?[0-9][A-Z][0-9]$"  ,cp)
				if (matchObj):
					cps = cp[0:3] + " " + cp[3:6]

				clubID = oClub["ID"]
				if clubID > 1000000:	# New club
					clubID = getClubID()
				
				nomU = scanName(oClub["name"])
				nomP = phonetics.metaphone(oClub["name"])
				doc = coll.update({ '_id': clubID}, { '$set': {'nom': oClub["name"], 'nomU': nomU, 'nomP': nomP, 'prive': oClub["prive"], 'adresse': oClub["addr"], 'municipal': oClub["ville"], 'codepostal': cp, 'codepostal2': cps, 'url_club': oClub["urlc"], 'url_ville': oClub["urlv"], 'telephone': oClub["tel1"], 'telephone2': oClub["tel2"], 'telephone3': oClub["tel3"], 'email': oClub["email"], 'region': oClub["region"], 'latitude': oClub["lat"], 'longitude': oClub["lng"] } },  upsert=True )
				
				Pids = getCourseColl(clubID)
				Bids = getBlocColl(Pids)
				courseRes, tupC, cRem = saveCourses(clubID, tupC, Pids)
				if 'oBlocs' in locals():
					blocRes, bRem = saveBlocs(tupC, Bids)
				else:
					blocRes = []
					bRem = []
				upd=coll.update({'_id':clubID}, {'$set':{"courses": oCourses, "location": {'type': "Point", 'coordinates': [ oClub["lng"], oClub["lat"] ]} }});
				doc["courses"] = courseRes
				doc["blocs"] = blocRes
				doc["removedC"] = cRem
				doc["removedB"] = bRem
				return dumps(doc)
			else: 
				return ('{"n":0,"ok":0, "message": "S0062"}')	# Check Session error
		else:
			return dumps({'ok': 0})	# No param
	except Exception as ex:
		return except_handler("saveClub", ex)
Ejemplo n.º 28
0
def metaphone_fuzzy_match(x,
                          against,
                          strategy=MatchStrategy.PARTIAL_TOKEN_SORT_RATIO):
    return fuzzy_match(phonetics.metaphone(x), phonetics.metaphone(against),
                       strategy)
Ejemplo n.º 29
0
def getPhoneticsRatioMorada(str1, str2):
	return fuzz.ratio(phonetics.metaphone(str1),phonetics.metaphone(str2))
Ejemplo n.º 30
0
 def phonetic_match(self, text_a, text_b):
     return phonetics.metaphone(text_a) == phonetics.metaphone(text_b)
Ejemplo n.º 31
0
def unique_list(l):
    ulist = []
    [ulist.append(x.upper()) for x in re.findall(r'\w+', l) if (x.upper() not in ulist) ]
    return ulist

k=0	
nodes=dict()

file = open('paths.txt', 'r')
data = file.readlines()
for line in data:
	chars = [char for char in line.split(' | ') if len(char)]
	for i in range(0,len(chars)):
		nodes[chars[i].strip()]=[k+i]
		nodes[chars[i].strip()].append(ph.metaphone(str(unique_list(unidecode((chars[i]).strip())))))
	k=k+len(chars)
	print k
file.close()

for f in range(1,16):
	file = open('train/train'+ str(f)+'.txt', 'r')
	data = file.readlines()
		
	for line in data:
		chars = [char for char in line.split(' | ') if len(char)]
		nodes[chars[0].strip()]=[k]
		nodes[chars[0].strip()].append(ph.metaphone(str(unique_list(unidecode((chars[0]).strip())))))
		nodes[chars[1].strip()]=[k+1]
		nodes[chars[1].strip()].append(ph.metaphone(str(unique_list(unidecode((chars[1]).strip())))))
		k=k+2