def update_entry(entry: dict) -> dict: name = entry["PaxName"].split() # Get name. entry["PassengerLastName"] = name[0] if len(name) >= 1 else "" entry["PassengerFirstName"] = name[1] if len(name) >= 2 else "" entry["PassengerSecondName"] = name[2] if len(name) >= 3 else "" entry["PassengerFirstName_en"] = (transliterate( entry["PassengerFirstName"]).replace("'", "").upper()) entry["PassengerSecondName_en"] = (transliterate( entry["PassengerSecondName"]).replace("'", "").upper()) entry["PassengerLastName_en"] = (transliterate( entry["PassengerLastName"]).replace("'", "").upper()) entry["PassengerFirstName_sx"] = soundex(entry["PassengerFirstName_en"]) entry["PassengerSecondName_sx"] = soundex(entry["PassengerSecondName_en"]) entry["PassengerLastName_sx"] = soundex(entry["PassengerLastName_en"]) # Transliterate name. entry["PassengerFirstName_en"] = (translit(entry["PassengerFirstName"], "ru", reversed=True).replace( "'", "").upper()) entry["PassengerSecondName_en"] = (translit(entry["PassengerSecondName"], "ru", reversed=True).replace( "'", "").upper()) entry["PassengerLastName_en"] = (translit(entry["PassengerLastName"], "ru", reversed=True).replace( "'", "").upper()) return entry
def check_for_english_alternative(unwords, eng_words): possible_replace_words = set() for uword in unwords: transliterated_word = transliteration.transliterate(uword) for ew in eng_words: if levenstein.levenshtein_ratio_and_distance( transliterated_word, ew, ratio_calc=True) > 0.9: # print(f'---bg---{uword}---transliterated as---{transliterated_word} --- is close to---{ew}') possible_replace_words.add(uword) return possible_replace_words
def main(filter_region=None): csv.register_dialect('csv', delimiter=';', quoting=csv.QUOTE_NONE) regions = {} with open(config.VLAN_PATH, 'r', encoding='utf-8-sig') as xl: result = csv.reader(xl, 'csv') for row in result: if row[0] != '' and row[1] == '': regions.update({transliterate(row[0]): []}) elif len(row) == 16 and transliterate(row[15]) in regions: regions[transliterate(row[15])].append(row) else: regions.update({'description': row}) if (os.path.isfile(config.VLAN_PATH_JSON)): os.remove(config.VLAN_PATH_JSON) json_file = open(config.VLAN_PATH_JSON, 'a+', encoding='utf-8-sig') json_file.write(json.dumps(regions)) json_file.close()
def on_transliterate(): global browser_context global trans_var global current_row global trans_widget global hidden trans_var.set("Loading...") if browser_context is None: browser_context = get_browser_context() try: transliteration = transliterate(browser_context, current_row[1]) trans_var.set("To Roman Urdu") trans_widget.pack() trans_widget.delete(1.0, tk.END) trans_widget.insert(tk.END, "{}".format(transliteration)) hidden = False except: trans_var.set("Error Loading") browser_context = get_browser_context()
def identify(word, lang='H'): word = word.lower().replace(',', '').replace('.', '') english_probability = 1. other_probability = 1. slang_probability = 1. word = handle_slang_words(word) # print '***', word, slang_dictionary()[word] if word in slang_dictionary() else False if word in slang_dictionary(): slang_probability *= 0.8 english_probability *= 0.1 other_probability *= 0.1 e_syn_set = word_synset(word) o_best_words = [] o_words = transliterate(word, lang) # print o_words for ow in o_words: o_synsets = word_synset(ow, lang=lang) o_top_word = top_other_words(ow, lang) if o_synsets and len(o_synsets) > 0 and o_top_word: o_best_words.append((ow, o_synsets, True)) elif o_synsets and len(o_synsets) > 0: o_best_words.append((ow, o_synsets, False)) elif o_top_word: o_best_words.append((ow, None, None)) if o_best_words and len(o_best_words) > 0: o_best_words = sorted(o_best_words, key=lambda item: len(item[1]) if item[1] else 0, reverse=True) if o_best_words and len(o_best_words) > 0: slang_probability *= 0.30 english_probability *= 0.30 other_probability *= 0.40 if any(u[2] for u in o_best_words): slang_probability *= 0.25 english_probability *= 0.25 other_probability *= 0.5 elif sum(1 for u in o_best_words if u[1] is not None) > 0: slang_probability *= 0.30 english_probability *= 0.30 other_probability *= 0.40 if e_syn_set is not None: slang_probability *= 0.30 english_probability *= 0.40 other_probability *= 0.30 if top_english_words(word): slang_probability *= 0.15 english_probability *= 0.7 other_probability *= 0.15 if slang_probability > english_probability and slang_probability > other_probability: return word, 'S', [slang_dictionary()[word]] elif english_probability > other_probability: return word, 'E', [] else: values = word, 'O', [u[0] for u in o_best_words] return values
def transliterate_massage(update, context): """Transliterate the the user message from Hebrew to Syriac or vice versa.""" update.effective_message.reply_text( transliterate(update.effective_message.text))
def testEnglishToHindi(): result = t.transliterate(en, "hi_IN") print result.encode('utf-8')