def DictionaryCreation(vocab_dict): py_compile.compile('segmentmeta.py') from segmentmeta import segmentamt new_temp=[] seg_list=[] file =open(vocab_dict_file,'w') for key in vocab_dict: new_temp.append(key) new_temp=sorted(new_temp)# sorted keys of the clustered word dictionary final_dict = dict() for x in xrange (1, len(new_temp)): seg = (segmentamt(new_temp[x])) seg_list.append(seg) for y in xrange(len(seg)): final_vo=[] final_vo = MaxiMumInVocab(vocab_dict,seg[y].upper()) if(final_dict.has_key(final_vo[0])==False): final_dict[final_vo[0]]=final_vo[1] file.write(str(final_vo[0])+'\n') file.close()
def DictionaryCreation(vocab_dict): py_compile.compile('segmentmeta.py') from segmentmeta import segmentamt new_temp = [] seg_list = [] file = open(vocab_dict_file, 'w') for key in vocab_dict: new_temp.append(key) new_temp = sorted(new_temp) # sorted keys of the clustered word dictionary final_dict = dict() for x in xrange(1, len(new_temp)): seg = (segmentamt(new_temp[x])) seg_list.append(seg) for y in xrange(len(seg)): final_vo = [] final_vo = MaxiMumInVocab(vocab_dict, seg[y].upper()) if (final_dict.has_key(final_vo[0]) == False): final_dict[final_vo[0]] = final_vo[1] file.write(str(final_vo[0]) + '\n') file.close()
def PrepareFinalString(Address,cluster,wordlist,abb_list,vocab_dict): from segmentmeta import segmentamt from segment import segment xyz_score=45 print(Address) Address =Address.lower() Address = (''.join( abb_list.get( word, word ) for word in re.split( '(\W+)', Address )) ) Address = re.sub('[^A-Za-z]+',' ', Address) #extracting on alphabets Address= Address.split(' ') #removing all white spaces to make a single long query for segmentation i.e ahmedblockgardentown I_do_not_have_anymore_names=[] for k in xrange(len(Address)): temp_segment = segment(Address[k]) print temp if(len(temp_segment)>1): for x in xrange(len(temp_segment)): answer = doublemetaphone(temp_segment[x]) if (len(answer[0])>1): metaph=answer[0] if(cluster.has_key(metaph.upper())): I_do_not_have_anymore_names.append(str(MaxiMumInVocab(cluster,metaph.upper())[0])) else: I_do_not_have_anymore_names.append(str(temp_segment[x])) else: metaph=answer[1] if(cluster.has_key(metaph.upper())): I_do_not_have_anymore_names.append(str(MaxiMumInVocab(cluster,metaph.upper())[0])) else: I_do_not_have_anymore_names.append(str(temp_segment[x])) #if not metaph: #I_do_not_have_anymore_names.append(temp_segment[x]) elif (len(temp_segment)==1): if(len(temp_segment[0])==1): I_do_not_have_anymore_names.append(temp_segment[0]) else: Match_score = SpellCheck(temp_segment[0],wordlist) xyz_score = (float(Match_score[0][1])*100) if(xyz_score<85): answer = doublemetaphone(temp_segment[0]) if (len(answer[0])>1): metaph=answer[0] else: metaph=answer[1] if not metaph: I_do_not_have_anymore_names.append(str(temp_segment[0])) segmented_meta=(segmentamt(metaph)) #print (segmented_meta) for y in xrange(len(segmented_meta)): if(cluster.has_key(segmented_meta[y].upper())): I_do_not_have_anymore_names.append(str(MaxiMumInVocab(cluster,segmented_meta[y].upper())[0])) else: I_do_not_have_anymore_names.append(str(temp_segment)) else: I_do_not_have_anymore_names.append(str(Match_score[0][0])) #print(I_do_not_have_anymore_names) Address =" ".join(I_do_not_have_anymore_names) Address = (''.join( abb_list.get( word, word ) for word in re.split( '(\W+)', Address )) ) try: remove_list = RemoveList(vocab_dict) Address = (''.join( remove_list.get( word, word ) for word in re.split( '(\W+)', Address))) except: Address= re.sub(' +',' ',Address) return Address.strip()
def PrepareFinalString(Address, cluster, wordlist, abb_list, vocab_dict): from segmentmeta import segmentamt from segment import segment xyz_score = 45 print(Address) Address = Address.lower() Address = (''.join( abb_list.get(word, word) for word in re.split('(\W+)', Address))) Address = re.sub('[^A-Za-z]+', ' ', Address) #extracting on alphabets Address = Address.split( ' ' ) #removing all white spaces to make a single long query for segmentation i.e ahmedblockgardentown I_do_not_have_anymore_names = [] for k in xrange(len(Address)): temp_segment = segment(Address[k]) print temp if (len(temp_segment) > 1): for x in xrange(len(temp_segment)): answer = doublemetaphone(temp_segment[x]) if (len(answer[0]) > 1): metaph = answer[0] if (cluster.has_key(metaph.upper())): I_do_not_have_anymore_names.append( str(MaxiMumInVocab(cluster, metaph.upper())[0])) else: I_do_not_have_anymore_names.append(str( temp_segment[x])) else: metaph = answer[1] if (cluster.has_key(metaph.upper())): I_do_not_have_anymore_names.append( str(MaxiMumInVocab(cluster, metaph.upper())[0])) else: I_do_not_have_anymore_names.append(str( temp_segment[x])) #if not metaph: #I_do_not_have_anymore_names.append(temp_segment[x]) elif (len(temp_segment) == 1): if (len(temp_segment[0]) == 1): I_do_not_have_anymore_names.append(temp_segment[0]) else: Match_score = SpellCheck(temp_segment[0], wordlist) xyz_score = (float(Match_score[0][1]) * 100) if (xyz_score < 85): answer = doublemetaphone(temp_segment[0]) if (len(answer[0]) > 1): metaph = answer[0] else: metaph = answer[1] if not metaph: I_do_not_have_anymore_names.append(str( temp_segment[0])) segmented_meta = (segmentamt(metaph)) #print (segmented_meta) for y in xrange(len(segmented_meta)): if (cluster.has_key(segmented_meta[y].upper())): I_do_not_have_anymore_names.append( str( MaxiMumInVocab( cluster, segmented_meta[y].upper())[0])) else: I_do_not_have_anymore_names.append( str(temp_segment)) else: I_do_not_have_anymore_names.append(str(Match_score[0][0])) #print(I_do_not_have_anymore_names) Address = " ".join(I_do_not_have_anymore_names) Address = (''.join( abb_list.get(word, word) for word in re.split('(\W+)', Address))) try: remove_list = RemoveList(vocab_dict) Address = (''.join( remove_list.get(word, word) for word in re.split('(\W+)', Address))) except: Address = re.sub(' +', ' ', Address) return Address.strip()