def fetch_site(): url = request.args['url'] r = requests.get(url) if "UTF-8" not in r.encoding: r.encoding = r.apparent_encoding htmlcontent = r.text #htmlcontent = htmlcontent.replace('href="/', 'href="' + url + '/') baseurl = re.sub('(https*://)([^/]+)/*.*', r'\1'+ r'\2', url,flags=re.IGNORECASE) baseurl = baseurl.replace('','') #print('Base URL') #print(baseurl) htmlcontent = convert(request.args['source'], request.args['target'], htmlcontent, json.loads(request.args['nativize']), json.loads(request.args['preOptions']), json.loads(request.args['postOptions'])) # Replace relative paths with absolute paths htmlcontent=re.sub("(\")/",r"\1"+baseurl+"/",htmlcontent) htmlcontent=re.sub("(\.\")/",r"\1"+baseurl+"/",htmlcontent) htmlcontent=re.sub("(url\()\/",r"\1"+baseurl+"/",htmlcontent) ## Parameters params = 'source=' + request.args['source'] + '&target=' + request.args['target'] + '&preOptions=' + request.args['preOptions'] + '&postOptions=' + request.args['postOptions'] + '&nativize=' + request.args['nativize'] transurl = html.escape("http://aksharamukha.appspot.com/api/website?"+params+'&url=') # fix double dot urlparts = url.split("/") doubledot ="" for i in range(0, len(urlparts)-2): doubledot = doubledot + urlparts[i]+ "/" htmlcontent=htmlcontent.replace("../",doubledot) ## Replace links htmlcontent=re.sub("(<a href\=\"?)",r"\1"+transurl,htmlcontent) htmlcontent=re.sub("(<a class=.*? href\=\"?)",r"\1"+transurl,htmlcontent) htmlcontent=re.sub("(<a target\=\"\_blank\" href\=\")",r"\1"+transurl,htmlcontent) htmlcontent=re.sub("(<a target\=\"\_self\" href\=\")",r"\1"+transurl,htmlcontent) ## Replace with native numerals htmlcontent = PostProcess.RetainIndicNumerals(htmlcontent, request.args['target'], True) ## Retain Dandas htmlcontent = PostProcess.RetainDandasIndic(htmlcontent, request.args['target'], True) return htmlcontent