def savetable(filename, tablemark, table): f = open(filename, 'r') html = f.read() f.close() f = open(filename, 'w') before = html.split(u'<!-- %s -->' % tablemark)[0] after = html.split(u'<!-- /%s -->' % tablemark)[1] html = u'%s<!-- %s -->%s<!-- /%s -->%s' % (before, tablemark, table, tablemark, after) f.write(html) f.close()
def test_animaunttv(): url = "https://animaunt.tv/1794-vanpanchmen-2-sezon.html" html = gethtml(url, True) html = html.split('<li class="vis"><span>Эпизоды:</span>', 1)[1].split("из", 1)[0] print(int(html)) return html
def page_not_found(msg): excluded = ["/api", "/.well-known", "/feeds", "/oauth/authorize"] if any([request.path.startswith(m) for m in excluded]): return jsonify({"error": "page not found"}), 404 html = get_spa_html(app.config["REEL2BITS_SPA_HTML"]) head, tail = html.split("</head>", 1) request_tags = get_request_head_tags(request) default_tags = get_default_head_tags(request.path) unique_attributes = ["name", "property"] final_tags = request_tags skip = [] for t in final_tags: for attr in unique_attributes: if attr in t: skip.append(t[attr]) for t in default_tags: existing = False for attr in unique_attributes: if t.get(attr) in skip: existing = True break if not existing: final_tags.append(t) head += "\n" + "\n".join(render_tags(final_tags)) + "\n</head>" return head + tail
async def translate_to_msg(text_msg, to): async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session: async with session.get(f"https://translate.google.com/m?hl=auto&sl=auto&tl={to}&ie=UTF-8&prev=_m&q={text_msg}") as response: html = await response.text() fin = html.split('result-container">')[1].split('</div>')[0] return fin
def grid(html): """ Replace the following markers with appropriate <div class="..."> and </div> tags. See README.md and static/code/blog-post.css for more information. - "[#grid side#]" - "[#grid text#]" - "[#grid bottom#]" - "[#grid end#]" :param html: blog post code (already converted from markdown to HTML) :returns: html with all markers replaced """ sections = ["side", "text", "bottom"] ret = "" in_grid = False for word in html.split("[#grid "): # Continue or start grid if in_grid: # Avoid "<p></div>" if ret[-3:] == "<p>": ret = ret[:-3] ret += "</div>" # New grid section tag_found = '' for section in sections: tag = section + "#]" if word.startswith(tag): tag_found = tag if not in_grid: ret += '<div class="grid">' in_grid = True ret += '<div class="grid-' + section + '">' break # End grid tag = "end#]" if word.startswith(tag): if not in_grid: raise ValueError("[#grid end#] found before it was opened!") tag_found = tag ret += "</div>" in_grid = False # Remove tag from word word = word[len(tag_found):] # Avoid "<div class=...></p>" if word[:4] == "</p>": word = word[4:] ret += word # Check for grids without end tag if in_grid: raise ValueError("Missing [#grid end#]!") return ret
def serve_spa(request): html = get_spa_html(settings.STOCKAZIO_SPA_HTML_ROOT) head, tail = html.split("</head>", 1) final_tags = {} # let's inject our meta tags in the HTML head += "\n" + "\n".join(render_tags(final_tags)) + "\n</head>" return http.HttpResponse(head + tail)
def serve_spa(request): html = get_spa_html(settings.FUNKWHALE_SPA_HTML_ROOT) head, tail = html.split("</head>", 1) if settings.FUNKWHALE_SPA_REWRITE_MANIFEST: new_url = (settings.FUNKWHALE_SPA_REWRITE_MANIFEST_URL or federation_utils.full_url( urls.reverse("api:v1:instance:spa-manifest"))) title = preferences.get("instance__name") if title: head = replace_title(head, title) head = replace_manifest_url(head, new_url) if not preferences.get("common__api_authentication_required"): try: request_tags = get_request_head_tags(request) or [] except urls.exceptions.Resolver404: # we don't have any custom tags for this route request_tags = [] else: # API is not open, we don't expose any custom data request_tags = [] default_tags = get_default_head_tags(request.path) unique_attributes = ["name", "property"] final_tags = request_tags skip = [] for t in final_tags: for attr in unique_attributes: if attr in t: skip.append(t[attr]) for t in default_tags: existing = False for attr in unique_attributes: if t.get(attr) in skip: existing = True break if not existing: final_tags.append(t) # let's inject our meta tags in the HTML head += "\n" + "\n".join(render_tags(final_tags)) + "\n</head>" css = get_custom_css() or "" if css: # We add the style add the end of the body to ensure it has the highest # priority (since it will come after other stylesheets) body, tail = tail.split("</body>", 1) css = "<style>{}</style>".format(css) tail = body + "\n" + css + "\n</body>" + tail # set a csrf token so that visitor can login / query API if needed token = csrf.get_token(request) response = http.HttpResponse(head + tail) response.set_cookie("csrftoken", token, max_age=None) return response
def stockDetail(stockNum): global htmlHandle,header myurl = 'http://hq.sinajs.cn/?_=0.7342358745470643&list=gb_' + stockNum html = htmlHandle.get(myurl,'') detail = html.split('="')[1].split(',') stockName = detail[0] nowPrice = detail[1] yesterdayPrice = detail[26] maxPrice = detail[8] minPrice = detail[7] changeRate = detail[2] return stockName,nowPrice,yesterdayPrice,maxPrice,minPrice,changeRate
def clean_html(html): if "<" in html and ">" in html: try: soup = BeautifulSoup(html, features="html.parser") plist = soup.find('plist') if plist: plist.decompose() # remove plists because ugh text = soup.getText() except: text = remove_tags(html) return '. '.join(text.split("\r\n\r\n\r\n")) else: return '. '.join(html.split("\r\n\r\n\r\n"))
def getInternetSources(self, html): internetSources = [] stringAtBeginningOfEachEntry = '<table border="0" cellspacing="0" cellpadding="0" class="searchResultsTable">' entries = html.split(stringAtBeginningOfEachEntry) # Remove leading entry which does not contain a table entries.pop(0) for e in entries: fullEntry = stringAtBeginningOfEachEntry + e try: source = self._getInternetSourceFromEntry(fullEntry) libLF.log('Got source! {}'.format(source.toNDJSON())) internetSources.append(source) except ValueError as err: libLF.log('Extraction failed: {}'.format(str(err))) return internetSources
def top_less(html,up): #Réduit le top htm_css_tab = html.split(";")#récupère chaque propriétés du css htm_propriete_tab = [] for css in htm_css_tab : propriete = css.split(":") htm_propriete_tab.append(propriete) for prop in htm_propriete_tab : if "top" in prop[0] : prop[1] = str(int(prop[1][:-2])-up)+"px" break chaine = "" for i in range(len(htm_propriete_tab)-1): chaine += str(htm_propriete_tab[i][0])+":"+str(htm_propriete_tab[i][1])+";" return chaine
def processFile(filename, reviews): style_map = "u => em" with open(filename, "rb") as docx_file: result = mammoth.convert_to_html(docx_file, style_map=style_map) html = result.value # The generated HTML paras = html.split('<p>') currentRotation = None parsedReview = None lastParsedReview = None for p in paras: s = p[:-4].strip() #s = s.replace(u'\xa0', ' ').replace(u'\u2013', '-') s = unicodedata.normalize('NFKC', s).replace(u'\u2013', '-') if len(s) > 0 and s[0] != '<': if len(s) > 0: if len(s) < 10: s = s[:-1].rstrip() if s=='H' or s=='M' or s=='L' or s=='R/N': currentRotation = s albumName = None albumReview = None waitingForAlbum = True else: if parsedReview is None: if ReviewParser.isNameString(s): parsedReview = ReviewParser(filename, currentRotation) parsedReview.parseNameString(s) lastParsedReview = None elif not (lastParsedReview is None): # did somebody put a newline in the middle of a review? Try to add it to the last parsedReview lastParsedReview.parseReviewString(s) else: parsedReview.parseReviewString(s) reviews.append(parsedReview.review) lastParsedReview = parsedReview parsedReview = None
def make_wrappable(html, wrap_limit=60, split_on=';?&@!$#-/\\"\''): # Currently using <wbr>, maybe should use ​ # http://www.cs.tut.fi/~jkorpela/html/nobr.html if len(html) <= wrap_limit: return html words = html.split() new_words = [] for word in words: wrapped_word = '' while len(word) > wrap_limit: for char in split_on: if char in word: first, rest = word.split(char, 1) wrapped_word += first + char + '<wbr>' word = rest break else: for i in range(0, len(word), wrap_limit): wrapped_word += word[i:i + wrap_limit] + '<wbr>' word = '' wrapped_word += word new_words.append(wrapped_word) return ' '.join(new_words)