Python Transliterator Examples, polyglot.transliteration.Transliterator Python Examples

Example #1

0

Show file

def transliterate(args):
  """Transliterate words according to the target language."""
  t = Transliterator(source_lang=args.lang,
                     target_lang=args.target)
  for l in args.input:
    words = l.strip().split()
    line_annotations = [u"{:<16}{:<16}".format(w, t.transliterate(w)) for w in words]
    _print(u"\n".join(line_annotations))
    _print(u"")

Example #2

0

Show file

def transliterate_to_hindi(sentence):
    global total_count
    total_count += 1
    try:
        return transliterate_google(sentence)
    except:
        global package_count
        package_count += 1
        english_hindi_transliterator = Transliterator(source_lang="en",
                                                      target_lang="hi")
        token_list = sentence.split(' ')
        transliterated_sent = ""
        for token in token_list:
            transliterated_sent += english_hindi_transliterator.transliterate(
                token) + " "
        return (transliterated_sent.strip(), "polyglot")

Example #3

0

Show file

def translit_part(string, idxs, verbose=False):
    
    '''
    This function takes a string and
    list of haunted indices and uses the
    polygot transliterator to transliterate
    each "good" char, and uses the handmade
    dictionary to translate each of the "haunted"
    ones. It transliterates by CHARACTER,
    not giving the most high-integrity transliterations
    of each name (e.g., it outputs ajy instead of ajay,
    the latter of which polyglot knows to output if
    you pass the whole word instead of a letter at a time).
    '''
    
    trans = Transliterator(source_lang='hi', target_lang='en')
    
    if verbose == True:
        print("\n RUNNING CLEAN TRANSLIT FUNCTION ON: ", string)
        print("INITIALIZED EMPTY STRING TO BECOME FINAL CLEAN TRANSLITERATION")
    clean_translit = ''
    for i, char in enumerate(string):
        if verbose == True:
            print("EXAMINING CHAR ", char, "AT INDEX ", i)
        if i not in idxs:
            if verbose == True:
                print("INDEX IS CLEAR")
            clean_translit += trans.transliterate(char)
            if verbose == True:
                print("ADDING", trans.transliterate(char), "TO CLEAN TRANSLIT")
        if i in idxs:
            if verbose == True:
                print("INDEX IS HAUNTED")
            clean_translit += translation_dict[char]
            if verbose == True:
                print("ADDING", translation_dict[char], "TO CLEAN TRANSLIT")
    
    if verbose == True:        
        print()
        print("FINAL CLEAN TRANSLIT: ", clean_translit)
        print()
        
    return clean_translit

Example #4

0

Show file

def transliterate_csv(path_in, path_out):
    res = []
    transliterator = Transliterator(source_lang="hi", target_lang="en")
    alpha = getHindiAlphabet()

    with open(path_in, "r", encoding='utf-8') as in_file:
        lines = in_file.read().splitlines()
        res.append(lines[0])
        for i in range(1, len(lines)):
            k = 0
            j = k
            res_line = ""
            while j < len(lines[i]):
                # if this is a hindi character, start trying to find the
                # whole word
                if lines[i][k] in alpha:
                    # skip to the end of the hindi characters
                    while k + 1 < len(lines[i]) and lines[i][k + 1] in alpha:
                        k += 1
                    # use this next variable to get where the last hindi character was
                    m = j
                    while m >= 1 and lines[i][m - 1] not in alpha:
                        m -= 1
                    res_line += lines[i][m:j]
                    hindi_str = lines[i][j:k + 1]
                    en_str = transliterator.transliterate(hindi_str)
                    res_line += en_str
                k += 1
                j = k
            # start at the end and add in the last bit of non-hindi
            m = len(lines[i])
            while m >= 1 and lines[i][m - 1] not in alpha:
                m -= 1
            res_line += lines[i][m:len(lines[i])]
            res.append(res_line)

    with open(path_out, "w+") as out_file:
        for line in res:
            out_file.write("{}\n".format(line))

Example #5

0

Show file

File: text.py Project: elyte5star/Entity-Extraction-using-Polyglot-from-XML-Corpus

 def transliterate(self, target_language="en"):
     """Transliterate the string to the target language."""
     t = Transliterator(source_lang=self.language,
                        target_lang=target_language)
     return t.transliterate(self.string)

Example #6

0

Show file

File: HindiChatterBot.py Project: Raphaeal19/HindiTalkBot-Corpus

def transliterationLang(textString):
    trl = Transliterator(source_lang="en", target_lang="hi")
    data = trl.transliterate(textString)
    print("transliterationLang: ", data)
    return data

Example #7

0

Show file

File: core_nlp.py Project: pranavsingh321/analyser

def translate_language(source, destination, text):
    transliterator = Transliterator(source_lang="en", target_lang="ru")
    return transliterator.transliterate(text)

Example #8

0

Show file

File: text.py Project: indatalabs/polyglot

 def transliterate(self, target_language="en"):
   """Transliterate the string to the target language."""
   t = Transliterator(source_lang=self.language,
                      target_lang=target_language)
   return t.transliterate(self.string)

Example #9

0

Show file

File: tu.py Project: devilsocket/function_vault

from polyglot.transliteration import Transliterator
trans = Transliterator(source_lang="en", target_lang="ur")


def transliterationAnalyzerUrduOne(txt):
    default = {}
    default["result"] = ' '.join(list(map(trans.transliterate, txt.split())))
    return default

Example #10

0

Show file

File: day6_polyglot.py Project: enliktjioe/28daysofnlp

for w in word.neighbors:
    print("{:<16}".format(w))
print("\n\nThe first 10 dimensions out the {} dimensions\n".format(word.vector.shape[0]))
print(word.vector[:10])


# ## Morphology

word = Text("Preprocessing is an essential step.").words[0]
print(word.morphemes)


# ## Transliteration

from polyglot.transliteration import Transliterator
transliterator = Transliterator(source_lang="en", target_lang="ru")
print(transliterator.transliterate(u"preprocessing"))


# # Introduction to Natural Language Processing with Polyglot [3]

# Dependencies
get_ipython().system('polyglot download embeddings2.en')
get_ipython().system('polyglot download ner2.en')
get_ipython().system('polyglot download sentiment2.en')
get_ipython().system('polyglot download pos2.en')
get_ipython().system('polyglot download morph2.en')
get_ipython().system('polyglot download transliteration2.ar')
get_ipython().system('polyglot download transliteration2.fr')

Example #11

0

Show file

File: transliterator.py Project: akiou/thai-language

# -*- coding: utf-8 -*-

from polyglot.text import Text
from polyglot.transliteration import Transliterator

__author__ = 'gree-gorey'
"""
Это если хочешь транслитить предложение
"""

blob = u'รัตนกศรีสยาม เรืองนามจากแผ่นดินถึงถิ่นสวรรค์'
text = Text(blob)

for x in text.transliterate(target_language="en"):
    print(x)
"""
А так можно транслитить по одному слову. Больше слова не транслитит(
"""

transliterator = Transliterator(source_lang="th", target_lang="en")

print transliterator.transliterate(u'เรืองนาม')

Example #12

0

Show file

File: ZmaninmTranslit.py Project: AMWJ/JudaismBot

from polyglot.transliteration import Transliterator

reddit_app_key = ""
reddit_app_secret = ""
reddit_user_name = ""
reddit_user_password = ""
reddit_user_agent = ""
subreddit_name = "Judaism"

reddit = praw.Reddit(user_agent=reddit_user_agent,
                     client_id=reddit_app_key,
                     client_secret=reddit_app_secret,
                     username=reddit_user_name,
                     password=reddit_user_password)

transliterator = Transliterator(source_lang="he", target_lang="en")
geolocator = Nominatim()
tf = TimezoneFinder()


def is_hebrew(term):
    return any("\u0590" <= c <= "\u05EA" for c in term)


def transliterate(phrase):
    return transliterator.transliterate(phrase)


def get_zmanim(address):
    c = geolocator.geocode(address)
    timezone = tf.timezone_at(lng=c.longitude, lat=c.latitude)