def transliterate(data, _from=None, _to=None, scheme_map=None, **kw): """Transliterate `data` with the given parameters:: output = transliterate('idam adbhutam', HK, DEVANAGARI) Each time the function is called, a new :class:`SchemeMap` is created to map the input scheme to the output scheme. This operation is fast enough for most use cases. But for higher performance, you can pass a pre-computed :class:`SchemeMap` instead:: scheme_map = SchemeMap(SCHEMES[HK], SCHEMES[DEVANAGARI]) output = transliterate('idam adbhutam', scheme_map=scheme_map) :param data: the data to transliterate :param _from: the name of a source scheme :param _to: the name of a destination scheme :param scheme_map: the :class:`SchemeMap` to use. If specified, ignore `_from` and `_to`. If unspecified, create a :class:`SchemeMap` from `_from` to `_to`. """ if scheme_map is None: from_scheme = SCHEMES[_from] to_scheme = SCHEMES[_to] scheme_map = sanscript.SchemeMap(from_scheme, to_scheme) return sanscript.transliterate(data=data, scheme_map=scheme_map)
def s_to_d(input_string): scheme = detect.detect(input_string) print("input: " + input_string + " --- encoding = " + str(scheme)) v_scheme_map = sanscript.SchemeMap(sanscript.SCHEMES[sanscript.SLP1], sanscript.SCHEMES[sanscript.DEVANAGARI]) output_string = sanscript.transliterate(input_string, scheme_map=v_scheme_map) scheme = detect.detect(output_string) print("output: " + output_string + " --- encoding = " + str(scheme)) return output_string
def v_to_d(input_string): scheme = detect.detect(input_string) if (str(scheme) == "Velthuis"): v_scheme_map = sanscript.SchemeMap( sanscript.SCHEMES[sanscript.VELTHUIS], sanscript.SCHEMES[sanscript.DEVANAGARI]) output_string = sanscript.transliterate(input_string, scheme_map=v_scheme_map) scheme = detect.detect(output_string) else: output_string = input_string return output_string
def i_to_d(input_string): scheme = detect.detect(input_string) if (str(scheme) == "ITRANS"): inputSchemeIndex = sanscript.ITRANS elif (str(scheme) == "HK"): inputSchemeIndex = sanscript.HK if ((str(scheme) == "ITRANS") or (str(scheme) == "HK")): v_scheme_map = sanscript.SchemeMap( sanscript.SCHEMES[inputSchemeIndex], sanscript.SCHEMES[sanscript.DEVANAGARI]) output_string = sanscript.transliterate(input_string, scheme_map=v_scheme_map) else: output_string = input_string return output_string
def d_to_i(input_string): scheme = detect.detect(input_string) #print("input: " + input_string + " --- encoding = " + str(scheme)) if (str(scheme) == "Devanagari"): v_scheme_map = sanscript.SchemeMap( sanscript.SCHEMES[sanscript.DEVANAGARI], sanscript.SCHEMES[sanscript.ITRANS]) output_string = sanscript.transliterate(input_string, scheme_map=v_scheme_map) #print("Ascii translation for tokenization:") scheme = detect.detect(output_string) #print("output: " + output_string + " --- encoding = " + str(scheme)) else: output_string = input_string #print("please enter the input in devanagari") return output_string
def main(): is_pragrahya = False print("input:" + sys.argv[1]) scheme = detect.detect(sys.argv[1]) input_string = "" print("input: " + sys.argv[1] + " --- encoding = " + str(scheme)) if (str(scheme) == "Devanagari"): v_scheme_map = sanscript.SchemeMap( sanscript.SCHEMES[sanscript.DEVANAGARI], sanscript.SCHEMES[sanscript.VELTHUIS]) input_string = sanscript.transliterate(sys.argv[1], scheme_map=v_scheme_map) else: print("please enter the input in devanagari") is_pragrahya = pragrahya_check(input_string) print("**Pragrahya " + str(is_pragrahya))
import time from collections import OrderedDict from concurrent.futures import ThreadPoolExecutor from functools import reduce import requests from lxml import etree from indic_transliteration import sanscript from requests import Session LISTINGS_BASE_URI = 'http://eoh.rkmathbangalore.org/listing/alphabet' WORD_PAGE_BASE_URI = 'http://eoh.rkmathbangalore.org/describe/word' concurrency = 10 HWS_XLITERATE_SCHEME_MAPS = [ sanscript.SchemeMap(sanscript.SCHEMES[sanscript.IAST], sanscript.SCHEMES[script]) for script in [ sanscript.DEVANAGARI, sanscript.ITRANS, sanscript.TAMIL, sanscript.TELUGU ] ] def get_listing_pages(): # we are getting all once for proper hyperlinking for alphabet in string.ascii_uppercase: yield requests.get('{}/{}'.format(LISTINGS_BASE_URI, alphabet)).text def _get_class_matching_xpath(tag: str, clsname: str): return "//{tag}[contains(concat(' ', normalize-space(@class), ' '), ' {cls} ')]".format( tag=tag, cls=clsname)