Example #1
0
def transliterate(args):
  """Transliterate words according to the target language."""
  t = Transliterator(source_lang=args.lang,
                     target_lang=args.target)
  for l in args.input:
    words = l.strip().split()
    line_annotations = [u"{:<16}{:<16}".format(w, t.transliterate(w)) for w in words]
    _print(u"\n".join(line_annotations))
    _print(u"")
Example #2
0
def transliterate_to_hindi(sentence):
    global total_count
    total_count += 1
    try:
        return transliterate_google(sentence)
    except:
        global package_count
        package_count += 1
        english_hindi_transliterator = Transliterator(source_lang="en",
                                                      target_lang="hi")
        token_list = sentence.split(' ')
        transliterated_sent = ""
        for token in token_list:
            transliterated_sent += english_hindi_transliterator.transliterate(
                token) + " "
        return (transliterated_sent.strip(), "polyglot")
Example #3
0
def translit_part(string, idxs, verbose=False):
    
    '''
    This function takes a string and
    list of haunted indices and uses the
    polygot transliterator to transliterate
    each "good" char, and uses the handmade
    dictionary to translate each of the "haunted"
    ones. It transliterates by CHARACTER,
    not giving the most high-integrity transliterations
    of each name (e.g., it outputs ajy instead of ajay,
    the latter of which polyglot knows to output if
    you pass the whole word instead of a letter at a time).
    '''
    
    trans = Transliterator(source_lang='hi', target_lang='en')
    
    if verbose == True:
        print("\n RUNNING CLEAN TRANSLIT FUNCTION ON: ", string)
        print("INITIALIZED EMPTY STRING TO BECOME FINAL CLEAN TRANSLITERATION")
    clean_translit = ''
    for i, char in enumerate(string):
        if verbose == True:
            print("EXAMINING CHAR ", char, "AT INDEX ", i)
        if i not in idxs:
            if verbose == True:
                print("INDEX IS CLEAR")
            clean_translit += trans.transliterate(char)
            if verbose == True:
                print("ADDING", trans.transliterate(char), "TO CLEAN TRANSLIT")
        if i in idxs:
            if verbose == True:
                print("INDEX IS HAUNTED")
            clean_translit += translation_dict[char]
            if verbose == True:
                print("ADDING", translation_dict[char], "TO CLEAN TRANSLIT")
    
    if verbose == True:        
        print()
        print("FINAL CLEAN TRANSLIT: ", clean_translit)
        print()
        
    return clean_translit
Example #4
0
def transliterate_csv(path_in, path_out):
    res = []
    transliterator = Transliterator(source_lang="hi", target_lang="en")
    alpha = getHindiAlphabet()

    with open(path_in, "r", encoding='utf-8') as in_file:
        lines = in_file.read().splitlines()
        res.append(lines[0])
        for i in range(1, len(lines)):
            k = 0
            j = k
            res_line = ""
            while j < len(lines[i]):
                # if this is a hindi character, start trying to find the
                # whole word
                if lines[i][k] in alpha:
                    # skip to the end of the hindi characters
                    while k + 1 < len(lines[i]) and lines[i][k + 1] in alpha:
                        k += 1
                    # use this next variable to get where the last hindi character was
                    m = j
                    while m >= 1 and lines[i][m - 1] not in alpha:
                        m -= 1
                    res_line += lines[i][m:j]
                    hindi_str = lines[i][j:k + 1]
                    en_str = transliterator.transliterate(hindi_str)
                    res_line += en_str
                k += 1
                j = k
            # start at the end and add in the last bit of non-hindi
            m = len(lines[i])
            while m >= 1 and lines[i][m - 1] not in alpha:
                m -= 1
            res_line += lines[i][m:len(lines[i])]
            res.append(res_line)

    with open(path_out, "w+") as out_file:
        for line in res:
            out_file.write("{}\n".format(line))
 def transliterate(self, target_language="en"):
     """Transliterate the string to the target language."""
     t = Transliterator(source_lang=self.language,
                        target_lang=target_language)
     return t.transliterate(self.string)
def transliterationLang(textString):
    trl = Transliterator(source_lang="en", target_lang="hi")
    data = trl.transliterate(textString)
    print("transliterationLang: ", data)
    return data
Example #7
0
def translate_language(source, destination, text):
    transliterator = Transliterator(source_lang="en", target_lang="ru")
    return transliterator.transliterate(text)
Example #8
0
 def transliterate(self, target_language="en"):
   """Transliterate the string to the target language."""
   t = Transliterator(source_lang=self.language,
                      target_lang=target_language)
   return t.transliterate(self.string)
Example #9
0
from polyglot.transliteration import Transliterator
trans = Transliterator(source_lang="en", target_lang="ur")


def transliterationAnalyzerUrduOne(txt):
    default = {}
    default["result"] = ' '.join(list(map(trans.transliterate, txt.split())))
    return default
Example #10
0
for w in word.neighbors:
    print("{:<16}".format(w))
print("\n\nThe first 10 dimensions out the {} dimensions\n".format(word.vector.shape[0]))
print(word.vector[:10])


# ## Morphology

word = Text("Preprocessing is an essential step.").words[0]
print(word.morphemes)


# ## Transliteration

from polyglot.transliteration import Transliterator
transliterator = Transliterator(source_lang="en", target_lang="ru")
print(transliterator.transliterate(u"preprocessing"))


# # Introduction to Natural Language Processing with Polyglot [3]

# Dependencies
get_ipython().system('polyglot download embeddings2.en')
get_ipython().system('polyglot download ner2.en')
get_ipython().system('polyglot download sentiment2.en')
get_ipython().system('polyglot download pos2.en')
get_ipython().system('polyglot download morph2.en')
get_ipython().system('polyglot download transliteration2.ar')
get_ipython().system('polyglot download transliteration2.fr')

Example #11
0
# -*- coding: utf-8 -*-

from polyglot.text import Text
from polyglot.transliteration import Transliterator

__author__ = 'gree-gorey'
"""
Это если хочешь транслитить предложение
"""

blob = u'รัตนกศรีสยาม เรืองนามจากแผ่นดินถึงถิ่นสวรรค์'
text = Text(blob)

for x in text.transliterate(target_language="en"):
    print(x)
"""
А так можно транслитить по одному слову. Больше слова не транслитит(
"""

transliterator = Transliterator(source_lang="th", target_lang="en")

print transliterator.transliterate(u'เรืองนาม')
Example #12
0
from polyglot.transliteration import Transliterator

reddit_app_key = ""
reddit_app_secret = ""
reddit_user_name = ""
reddit_user_password = ""
reddit_user_agent = ""
subreddit_name = "Judaism"

reddit = praw.Reddit(user_agent=reddit_user_agent,
                     client_id=reddit_app_key,
                     client_secret=reddit_app_secret,
                     username=reddit_user_name,
                     password=reddit_user_password)

transliterator = Transliterator(source_lang="he", target_lang="en")
geolocator = Nominatim()
tf = TimezoneFinder()


def is_hebrew(term):
    return any("\u0590" <= c <= "\u05EA" for c in term)


def transliterate(phrase):
    return transliterator.transliterate(phrase)


def get_zmanim(address):
    c = geolocator.geocode(address)
    timezone = tf.timezone_at(lng=c.longitude, lat=c.latitude)