Python split 예제들, html.split Python 예제들

예제 #1

0

파일 보기

파일: hemeroteca.py 프로젝트: emijrp/emijrp.github.io

def savetable(filename, tablemark, table):
    f = open(filename, 'r')
    html = f.read()
    f.close()
    f = open(filename, 'w')
    before = html.split(u'<!-- %s -->' % tablemark)[0]
    after = html.split(u'<!-- /%s -->' % tablemark)[1]
    html = u'%s<!-- %s -->%s<!-- /%s -->%s' % (before, tablemark, table, tablemark, after)
    f.write(html)
    f.close()

예제 #2

0

파일 보기

파일: hemeroteca.py 프로젝트: bennylangston/emijrp.github.io

def savetable(filename, tablemark, table):
    f = open(filename, 'r')
    html = f.read()
    f.close()
    f = open(filename, 'w')
    before = html.split(u'<!-- %s -->' % tablemark)[0]
    after = html.split(u'<!-- /%s -->' % tablemark)[1]
    html = u'%s<!-- %s -->%s<!-- /%s -->%s' % (before, tablemark, table,
                                               tablemark, after)
    f.write(html)
    f.close()

예제 #3

0

파일 보기

파일: test.py 프로젝트: healingdrawing/data

def test_animaunttv():
    url = "https://animaunt.tv/1794-vanpanchmen-2-sezon.html"
    html = gethtml(url, True)
    html = html.split('<li class="vis"><span>Эпизоды:</span>',
                      1)[1].split("из", 1)[0]
    print(int(html))
    return html

예제 #4

0

파일 보기

    def page_not_found(msg):
        excluded = ["/api", "/.well-known", "/feeds", "/oauth/authorize"]
        if any([request.path.startswith(m) for m in excluded]):
            return jsonify({"error": "page not found"}), 404

        html = get_spa_html(app.config["REEL2BITS_SPA_HTML"])
        head, tail = html.split("</head>", 1)

        request_tags = get_request_head_tags(request)

        default_tags = get_default_head_tags(request.path)
        unique_attributes = ["name", "property"]

        final_tags = request_tags
        skip = []

        for t in final_tags:
            for attr in unique_attributes:
                if attr in t:
                    skip.append(t[attr])
        for t in default_tags:
            existing = False
            for attr in unique_attributes:
                if t.get(attr) in skip:
                    existing = True
                    break
            if not existing:
                final_tags.append(t)

        head += "\n" + "\n".join(render_tags(final_tags)) + "\n</head>"
        return head + tail

예제 #5

0

파일 보기

파일: __asena.py 프로젝트: yusufusta/AsenaUserBot

async def translate_to_msg(text_msg, to):
    async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
        async with session.get(f"https://translate.google.com/m?hl=auto&sl=auto&tl={to}&ie=UTF-8&prev=_m&q={text_msg}") as response:

            html = await response.text()
            fin = html.split('result-container">')[1].split('</div>')[0]
            return fin

예제 #6

0

파일 보기

파일: page.py 프로젝트: postmarketOS-mirror/postmarketos.org

def grid(html):
    """ Replace the following markers with appropriate <div class="..."> and
        </div> tags. See README.md and static/code/blog-post.css for more
        information.
        - "[#grid side#]"
        - "[#grid text#]"
        - "[#grid bottom#]"
        - "[#grid end#]"
        :param html: blog post code (already converted from markdown to HTML)
        :returns: html with all markers replaced """
    sections = ["side", "text", "bottom"]
    ret = ""
    in_grid = False

    for word in html.split("[#grid "):
        # Continue or start grid
        if in_grid:
            # Avoid "<p></div>"
            if ret[-3:] == "<p>":
                ret = ret[:-3]
            ret += "</div>"

        # New grid section
        tag_found = ''
        for section in sections:
            tag = section + "#]"
            if word.startswith(tag):
                tag_found = tag
                if not in_grid:
                    ret += '<div class="grid">'
                    in_grid = True
                ret += '<div class="grid-' + section + '">'
                break

        # End grid
        tag = "end#]"
        if word.startswith(tag):
            if not in_grid:
                raise ValueError("[#grid end#] found before it was opened!")
            tag_found = tag
            ret += "</div>"
            in_grid = False

        # Remove tag from word
        word = word[len(tag_found):]

        # Avoid "<div class=...></p>"
        if word[:4] == "</p>":
            word = word[4:]

        ret += word

    # Check for grids without end tag
    if in_grid:
        raise ValueError("Missing [#grid end#]!")
    return ret

예제 #7

0

파일 보기

def serve_spa(request):
    html = get_spa_html(settings.STOCKAZIO_SPA_HTML_ROOT)
    head, tail = html.split("</head>", 1)

    final_tags = {}

    # let's inject our meta tags in the HTML
    head += "\n" + "\n".join(render_tags(final_tags)) + "\n</head>"

    return http.HttpResponse(head + tail)

예제 #8

0

파일 보기

def serve_spa(request):
    html = get_spa_html(settings.FUNKWHALE_SPA_HTML_ROOT)
    head, tail = html.split("</head>", 1)
    if settings.FUNKWHALE_SPA_REWRITE_MANIFEST:
        new_url = (settings.FUNKWHALE_SPA_REWRITE_MANIFEST_URL
                   or federation_utils.full_url(
                       urls.reverse("api:v1:instance:spa-manifest")))
        title = preferences.get("instance__name")
        if title:
            head = replace_title(head, title)
        head = replace_manifest_url(head, new_url)

    if not preferences.get("common__api_authentication_required"):
        try:
            request_tags = get_request_head_tags(request) or []
        except urls.exceptions.Resolver404:
            # we don't have any custom tags for this route
            request_tags = []
    else:
        # API is not open, we don't expose any custom data
        request_tags = []
    default_tags = get_default_head_tags(request.path)
    unique_attributes = ["name", "property"]

    final_tags = request_tags
    skip = []

    for t in final_tags:
        for attr in unique_attributes:
            if attr in t:
                skip.append(t[attr])
    for t in default_tags:
        existing = False
        for attr in unique_attributes:
            if t.get(attr) in skip:
                existing = True
                break
        if not existing:
            final_tags.append(t)

    # let's inject our meta tags in the HTML
    head += "\n" + "\n".join(render_tags(final_tags)) + "\n</head>"
    css = get_custom_css() or ""
    if css:
        # We add the style add the end of the body to ensure it has the highest
        # priority (since it will come after other stylesheets)
        body, tail = tail.split("</body>", 1)
        css = "<style>{}</style>".format(css)
        tail = body + "\n" + css + "\n</body>" + tail

    # set a csrf token so that visitor can login / query API if needed
    token = csrf.get_token(request)
    response = http.HttpResponse(head + tail)
    response.set_cookie("csrftoken", token, max_age=None)
    return response

예제 #9

0

파일 보기

def stockDetail(stockNum):
    global htmlHandle,header
    myurl = 'http://hq.sinajs.cn/?_=0.7342358745470643&list=gb_' + stockNum

    html = htmlHandle.get(myurl,'')
    detail = html.split('="')[1].split(',')
    stockName = detail[0]
    nowPrice = detail[1]
    yesterdayPrice = detail[26]
    maxPrice = detail[8]
    minPrice = detail[7]
    changeRate = detail[2]
    return stockName,nowPrice,yesterdayPrice,maxPrice,minPrice,changeRate

예제 #10

0

파일 보기

파일: to_sentences.py 프로젝트: hack4impact-upenn/pgp-suggestion-api

def clean_html(html):
    if "<" in html and ">" in html:
        try:
            soup = BeautifulSoup(html, features="html.parser")
            plist = soup.find('plist')
            if plist:
                plist.decompose() # remove plists because ugh
            text = soup.getText()
        except:
            text = remove_tags(html)
        return '. '.join(text.split("\r\n\r\n\r\n"))
    else:
        return '. '.join(html.split("\r\n\r\n\r\n"))

예제 #11

0

파일 보기

    def getInternetSources(self, html):
        internetSources = []

        stringAtBeginningOfEachEntry = '<table border="0" cellspacing="0" cellpadding="0" class="searchResultsTable">'
        entries = html.split(stringAtBeginningOfEachEntry)
        # Remove leading entry which does not contain a table
        entries.pop(0)
        for e in entries:
            fullEntry = stringAtBeginningOfEachEntry + e
            try:
                source = self._getInternetSourceFromEntry(fullEntry)
                libLF.log('Got source! {}'.format(source.toNDJSON()))
                internetSources.append(source)
            except ValueError as err:
                libLF.log('Extraction failed: {}'.format(str(err)))
        return internetSources

예제 #12

0

파일 보기

def top_less(html,up): #Réduit le top

    htm_css_tab = html.split(";")#récupère chaque propriétés du css

    htm_propriete_tab = []
    for css in htm_css_tab : 
        propriete =  css.split(":") 
        htm_propriete_tab.append(propriete)

    for prop in htm_propriete_tab :
        if "top" in prop[0] : 
            prop[1] = str(int(prop[1][:-2])-up)+"px"
            break
 
    chaine = ""
    for i in range(len(htm_propriete_tab)-1):
        chaine += str(htm_propriete_tab[i][0])+":"+str(htm_propriete_tab[i][1])+";"
    return chaine

예제 #13

0

파일 보기

    def processFile(filename, reviews):
        style_map = "u => em"

        with open(filename, "rb") as docx_file:
            result = mammoth.convert_to_html(docx_file, style_map=style_map)
            html = result.value # The generated HTML
            paras = html.split('<p>')
            currentRotation = None

            parsedReview = None
            lastParsedReview = None

            for p in paras:
                s = p[:-4].strip()
                #s = s.replace(u'\xa0', ' ').replace(u'\u2013', '-')
                s = unicodedata.normalize('NFKC', s).replace(u'\u2013', '-')
                if len(s) > 0 and s[0] != '<':
                    if len(s) > 0:
                        if len(s) < 10:
                            s = s[:-1].rstrip()
                            if s=='H' or s=='M' or s=='L' or s=='R/N':
                                currentRotation = s
                                albumName = None
                                albumReview = None
                                waitingForAlbum = True
                        else:
                            if parsedReview is None:
                                if ReviewParser.isNameString(s):
                                    parsedReview = ReviewParser(filename, currentRotation)
                                    parsedReview.parseNameString(s)
                                    lastParsedReview = None
                                elif not (lastParsedReview is None):
                                    # did somebody put a newline in the middle of a review? Try to add it to the last parsedReview
                                    lastParsedReview.parseReviewString(s)
                            else:
                                parsedReview.parseReviewString(s)
                                reviews.append(parsedReview.review)
                                lastParsedReview = parsedReview
                                parsedReview = None

예제 #14

0

파일 보기

파일: formatter.py 프로젝트: zaneb/paste

def make_wrappable(html, wrap_limit=60, split_on=';?&@!$#-/\\"\''):
    # Currently using <wbr>, maybe should use &#8203;
    #   http://www.cs.tut.fi/~jkorpela/html/nobr.html
    if len(html) <= wrap_limit:
        return html
    words = html.split()
    new_words = []
    for word in words:
        wrapped_word = ''
        while len(word) > wrap_limit:
            for char in split_on:
                if char in word:
                    first, rest = word.split(char, 1)
                    wrapped_word += first + char + '<wbr>'
                    word = rest
                    break
            else:
                for i in range(0, len(word), wrap_limit):
                    wrapped_word += word[i:i + wrap_limit] + '<wbr>'
                word = ''
        wrapped_word += word
        new_words.append(wrapped_word)
    return ' '.join(new_words)