예제 #1
0
def fetch_site():
    url = request.args['url']
    r = requests.get(url)
    if "UTF-8" not in r.encoding:
        r.encoding = r.apparent_encoding

    htmlcontent = r.text

    #htmlcontent = htmlcontent.replace('href="/', 'href="' + url + '/')

    baseurl = re.sub('(https*://)([^/]+)/*.*', r'\1'+ r'\2', url,flags=re.IGNORECASE)
    baseurl = baseurl.replace('‍','')

    #print('Base URL')
    #print(baseurl)

    htmlcontent = convert(request.args['source'], request.args['target'], htmlcontent, json.loads(request.args['nativize']),
        json.loads(request.args['preOptions']), json.loads(request.args['postOptions']))

    # Replace relative paths with absolute paths
    htmlcontent=re.sub("(\")/",r"\1"+baseurl+"/",htmlcontent)
    htmlcontent=re.sub("(\.\")/",r"\1"+baseurl+"/",htmlcontent)
    htmlcontent=re.sub("(url\()\/",r"\1"+baseurl+"/",htmlcontent)

    ## Parameters

    params = 'source=' + request.args['source'] + '&target=' + request.args['target'] + '&preOptions=' + request.args['preOptions'] + '&postOptions=' + request.args['postOptions'] + '&nativize=' + request.args['nativize']

    transurl = html.escape("http://aksharamukha.appspot.com/api/website?"+params+'&url=')

    # fix double dot
    urlparts = url.split("/")
    doubledot =""
    for  i in range(0, len(urlparts)-2):
        doubledot = doubledot + urlparts[i]+ "/"

    htmlcontent=htmlcontent.replace("../",doubledot)

    ## Replace links

    htmlcontent=re.sub("(<a href\=\"?)",r"\1"+transurl,htmlcontent)
    htmlcontent=re.sub("(<a class=.*? href\=\"?)",r"\1"+transurl,htmlcontent)
    htmlcontent=re.sub("(<a target\=\"\_blank\" href\=\")",r"\1"+transurl,htmlcontent)
    htmlcontent=re.sub("(<a target\=\"\_self\" href\=\")",r"\1"+transurl,htmlcontent)

    ## Replace with native numerals

    htmlcontent = PostProcess.RetainIndicNumerals(htmlcontent, request.args['target'], True)

    ## Retain Dandas

    htmlcontent = PostProcess.RetainDandasIndic(htmlcontent, request.args['target'], True)

    return htmlcontent
예제 #2
0
def conjuncts_list():
    script1 = request.json['script1']
    script2 = request.json['script2']
    vowel = request.json['vowel']

    postoptions = request.json['postoptions']

    print('The post options are :: ')
    print(postoptions)

    if script1[0:3] < 'Tir':
        index = '1'
    else:
        index = '2'

    """
    for key, value in conj.items():
        result_script1 = list(unique_everseen([convert('IAST', script1, x, False,[],[]) for x in value]))
        result_iast = [convert(script1, 'IAST', x, False,['removeChillus'],[]) for x in result_script1]
        actual_result = sorted(set(value) & set(result_iast), key=value.index)

        results[key] = [convert('IAST', script1, x, False,[], postoptions) for x in actual_result]
        results[key[:-1] + '2'] = [convert('IAST', script2, x, False,[], []) for x in actual_result]
    """

    if script1 == 'Sinhala':
        if 'SinhalaConjuncts' in postoptions:
            file = 'resources/conjuncts'+ index + '/conjuncts_' + script1 + '_' + vowel + '_all' + '.json'
        else:
            file = 'resources/conjuncts'+ index + '/conjuncts_' + script1 +  '_' + vowel + '.json'
    elif script1 == 'Chakma':
        if 'ChakmaEnableAllConjuncts' in postoptions:
            file = 'resources/conjuncts'+ index + '/conjuncts_' + script1 + '_' + vowel + '_all' + '.json'
        else:
            file = 'resources/conjuncts'+ index + '/conjuncts_' + script1 +  '_' + vowel + '.json'
    else:
        file = 'resources/conjuncts'+ index + '/conjuncts_' + script1 +  '_' + vowel + '.json'

    f = open (file, 'r', encoding='utf-8')
    conjuncts = f.read()
    conjuncts = conjuncts.replace('،', ',').replace(" ", "")
    f.close()

    if script2 != 'Velthuis':
        conjuncts_guide = convert(script1, script2, conjuncts, False,[],[])
        conjuncts_guide = PostProcess.RetainIndicNumerals(conjuncts_guide, script2, True)
        conjuncts_guide = json.loads(conjuncts_guide.replace('،', ','))
    else:
        conjuncts_guide = convert(script1, script2, conjuncts, False,[],[]).replace('""', '"\\"').replace('&"', '&\\"')
        conjuncts_guide = json.loads(PostProcess.RetainIndicNumerals(conjuncts_guide, script2, True))

    conjuncts = json.loads(conjuncts)

    results = {}

    results['conjuncts1S1'] = conjuncts['conjuncts1S1']
    results['conjuncts2S1'] = conjuncts['conjuncts2S1']
    results['conjuncts3S1'] = conjuncts['conjuncts3S1']
    results['conjuncts4S1'] = conjuncts['conjuncts4S1']
    results['conjuncts5S1'] = conjuncts['conjuncts5S1']

    results['conjuncts1S2'] = conjuncts_guide['conjuncts1S1']
    results['conjuncts2S2'] = conjuncts_guide['conjuncts2S1']
    results['conjuncts3S2'] = conjuncts_guide['conjuncts3S1']
    results['conjuncts4S2'] = conjuncts_guide['conjuncts4S1']
    results['conjuncts5S2'] = conjuncts_guide['conjuncts5S1']

    return jsonify(results)