def transliterate(args): """Transliterate words according to the target language.""" t = Transliterator(source_lang=args.lang, target_lang=args.target) for l in args.input: words = l.strip().split() line_annotations = [u"{:<16}{:<16}".format(w, t.transliterate(w)) for w in words] _print(u"\n".join(line_annotations)) _print(u"")
def transliterate_to_hindi(sentence): global total_count total_count += 1 try: return transliterate_google(sentence) except: global package_count package_count += 1 english_hindi_transliterator = Transliterator(source_lang="en", target_lang="hi") token_list = sentence.split(' ') transliterated_sent = "" for token in token_list: transliterated_sent += english_hindi_transliterator.transliterate( token) + " " return (transliterated_sent.strip(), "polyglot")
def translit_part(string, idxs, verbose=False): ''' This function takes a string and list of haunted indices and uses the polygot transliterator to transliterate each "good" char, and uses the handmade dictionary to translate each of the "haunted" ones. It transliterates by CHARACTER, not giving the most high-integrity transliterations of each name (e.g., it outputs ajy instead of ajay, the latter of which polyglot knows to output if you pass the whole word instead of a letter at a time). ''' trans = Transliterator(source_lang='hi', target_lang='en') if verbose == True: print("\n RUNNING CLEAN TRANSLIT FUNCTION ON: ", string) print("INITIALIZED EMPTY STRING TO BECOME FINAL CLEAN TRANSLITERATION") clean_translit = '' for i, char in enumerate(string): if verbose == True: print("EXAMINING CHAR ", char, "AT INDEX ", i) if i not in idxs: if verbose == True: print("INDEX IS CLEAR") clean_translit += trans.transliterate(char) if verbose == True: print("ADDING", trans.transliterate(char), "TO CLEAN TRANSLIT") if i in idxs: if verbose == True: print("INDEX IS HAUNTED") clean_translit += translation_dict[char] if verbose == True: print("ADDING", translation_dict[char], "TO CLEAN TRANSLIT") if verbose == True: print() print("FINAL CLEAN TRANSLIT: ", clean_translit) print() return clean_translit
def transliterate_csv(path_in, path_out): res = [] transliterator = Transliterator(source_lang="hi", target_lang="en") alpha = getHindiAlphabet() with open(path_in, "r", encoding='utf-8') as in_file: lines = in_file.read().splitlines() res.append(lines[0]) for i in range(1, len(lines)): k = 0 j = k res_line = "" while j < len(lines[i]): # if this is a hindi character, start trying to find the # whole word if lines[i][k] in alpha: # skip to the end of the hindi characters while k + 1 < len(lines[i]) and lines[i][k + 1] in alpha: k += 1 # use this next variable to get where the last hindi character was m = j while m >= 1 and lines[i][m - 1] not in alpha: m -= 1 res_line += lines[i][m:j] hindi_str = lines[i][j:k + 1] en_str = transliterator.transliterate(hindi_str) res_line += en_str k += 1 j = k # start at the end and add in the last bit of non-hindi m = len(lines[i]) while m >= 1 and lines[i][m - 1] not in alpha: m -= 1 res_line += lines[i][m:len(lines[i])] res.append(res_line) with open(path_out, "w+") as out_file: for line in res: out_file.write("{}\n".format(line))
def transliterate(self, target_language="en"): """Transliterate the string to the target language.""" t = Transliterator(source_lang=self.language, target_lang=target_language) return t.transliterate(self.string)
def transliterationLang(textString): trl = Transliterator(source_lang="en", target_lang="hi") data = trl.transliterate(textString) print("transliterationLang: ", data) return data
def translate_language(source, destination, text): transliterator = Transliterator(source_lang="en", target_lang="ru") return transliterator.transliterate(text)
from polyglot.transliteration import Transliterator trans = Transliterator(source_lang="en", target_lang="ur") def transliterationAnalyzerUrduOne(txt): default = {} default["result"] = ' '.join(list(map(trans.transliterate, txt.split()))) return default
for w in word.neighbors: print("{:<16}".format(w)) print("\n\nThe first 10 dimensions out the {} dimensions\n".format(word.vector.shape[0])) print(word.vector[:10]) # ## Morphology word = Text("Preprocessing is an essential step.").words[0] print(word.morphemes) # ## Transliteration from polyglot.transliteration import Transliterator transliterator = Transliterator(source_lang="en", target_lang="ru") print(transliterator.transliterate(u"preprocessing")) # # Introduction to Natural Language Processing with Polyglot [3] # Dependencies get_ipython().system('polyglot download embeddings2.en') get_ipython().system('polyglot download ner2.en') get_ipython().system('polyglot download sentiment2.en') get_ipython().system('polyglot download pos2.en') get_ipython().system('polyglot download morph2.en') get_ipython().system('polyglot download transliteration2.ar') get_ipython().system('polyglot download transliteration2.fr')
# -*- coding: utf-8 -*- from polyglot.text import Text from polyglot.transliteration import Transliterator __author__ = 'gree-gorey' """ Это если хочешь транслитить предложение """ blob = u'รัตนกศรีสยาม เรืองนามจากแผ่นดินถึงถิ่นสวรรค์' text = Text(blob) for x in text.transliterate(target_language="en"): print(x) """ А так можно транслитить по одному слову. Больше слова не транслитит( """ transliterator = Transliterator(source_lang="th", target_lang="en") print transliterator.transliterate(u'เรืองนาม')
from polyglot.transliteration import Transliterator reddit_app_key = "" reddit_app_secret = "" reddit_user_name = "" reddit_user_password = "" reddit_user_agent = "" subreddit_name = "Judaism" reddit = praw.Reddit(user_agent=reddit_user_agent, client_id=reddit_app_key, client_secret=reddit_app_secret, username=reddit_user_name, password=reddit_user_password) transliterator = Transliterator(source_lang="he", target_lang="en") geolocator = Nominatim() tf = TimezoneFinder() def is_hebrew(term): return any("\u0590" <= c <= "\u05EA" for c in term) def transliterate(phrase): return transliterator.transliterate(phrase) def get_zmanim(address): c = geolocator.geocode(address) timezone = tf.timezone_at(lng=c.longitude, lat=c.latitude)