Python Tomd.replace Examples

Programming Language: Python

Namespace/Package Name: tomd

Class/Type: Tomd

Method/Function: replace

Examples at hotexamples.com: 4

Python Tomd.replace - 4 examples found. These are the top rated real world Python examples of tomd.Tomd.replace extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Tomd(30)

replace(4)

find(2)

split(1)

Frequently Used Methods

Tomd (30)

replace (4)

find (2)

split (1)

Example #1

Show file

File: main.py Project: wkias/zsxq-crawler

 def parse_article(self, url):
     html = requests.get(url=url, headers=self.headers).text
     html = bs(html, features="lxml")
     html = html.find("div", class_="content")
     md = Tomd(str(html)).markdown
     md = md.replace("\n", "")
     return md

Example #2

Show file

def build_file(name):
    path = "posts/" + name + "/index.html"

    f = codecs.open(path, 'r')
    html = f.read()

    soup = BeautifulSoup(html, features="html5lib")

    # Save title
    md = "---"
    md += "\ntitle: " + soup.find("h1").get_text().strip()
    md += "\ndate: " + soup.find("time").get_text().replace("-", "/")
    md += "\ntags: " + soup.find("a", "tag-link").get_text()
    md += "\n---\n"

    content = soup.find(itemprop="articleBody")

    for child in content.children:
        if str(child)[:4] == '<img':
            end_point = child['src'].rfind('/') + 1
            file_name = child['src'][end_point:]
            text = "{% asset_img " + file_name + " %}"
            md += "\n" + text + "\n"
        elif str(child)[:2] == '<h':
            num_pounds = "#" * int(str(child)[2])
            md += "\n" + num_pounds + " " + child.get_text() + "\n"
        elif str(child)[:3] == '<if':
            md += "\n" + str(child) + "\n"
        elif str(child)[:24] == '<figure class="highlight':
            code_sample = str(child)

            code_type = code_sample[25:code_sample.find('"', 24)]

            temp_md = Tomd(str(child)).markdown
            temp_md = temp_md[temp_md.find('<td class="code"'):]
            temp_md = BeautifulSoup(temp_md, features="html5lib").find("pre")

            pre_md = str(temp_md)
            pre_md = pre_md[5:-6]

            temp_md = "\n``` "
            temp_md += code_type + '\n'
            for i, char in enumerate(pre_md):
                if pre_md[i:i + 5] == '<br/>':
                    temp_md += '\n'
                    temp_md += char
                else:
                    temp_md += char
            temp_md += '```'

            md += temp_md.replace('<br/>', '')
        else:
            md += Tomd(str(child)).markdown

    with open('posts/' + name + '.md', 'w') as file:
        file.write(md)

Example #3

Show file

def soup_snapshot_2md(soup):
    # 待写入字符串
    text = str()

    # 查找所有有关的节点
    tags = soup.find_all(
        class_="page-section page-section--first article-body")[0]
    body_tag = soup.find_all("div", class_="end-with-block")[0]
    author_tag = soup.find_all("div", class_="attribution pb-3")[0]

    # 快照图片
    img_tag = soup.find_all(class_="article-head__image-container")[0]
    img_url = f"![]({'https://www.minecraft.net'}{img_tag.img['src']})"
    text = img_url + "\n"

    # 标题、副标题
    # head = 'Minecraft Snapshot 20w21a'
    head = tags.h1.get_text(strip=True)
    lead = tags.p.get_text(strip=True)
    text += "# " + head + "\n"
    text += "## " + lead + "\n"

    # 获得版本
    if "snapshot" in head.lower():
        head_name = head[head.rfind(" ") + 1:]
        dirname = "./snapshots/" + head_name
        filename = head_name
        print(dirname)
    elif "pre-release" in head.lower():
        head_name = head[head.find(" ") + 1:]
        dirname = "./pre_release/" + head_name
        filename = head_name
        print(dirname)
    elif "candidate" in head.lower():
        head_name = head[head.find(" ") + 1:]
        dirname = "./candidate/" + head_name
        filename = head_name
        print(dirname)
    elif "edition" or "released" in head.lower():
        head_name = head
        dirname = "./edition/" + head_name
        filename = head_name
        print(dirname)
    elif "update" in head.lower():
        head_name = head[head.find(" ") + 1:]
        dirname = "./edition/" + head_name
        filename = head_name
        print(dirname)

    # 文章主体转换为markdown
    body_html = str()
    output = str()
    for child in body_tag.children:
        body_html += str(child)
    output = Tomd(body_html).markdown
    output = output.replace("<br/>", "")
    output = output.replace("&lt;", "<")
    output = output.replace("&gt;", ">")
    output = output.replace("<li>", "- ")
    output = output.replace("</li>\n", "")
    text += output

    # 文章作者
    author = author_tag.dl.get_text()
    try:
        author_img_url = f"![]({'https://www.minecraft.net'}{author_tag.img['src']})"
    except TypeError:
        author_img_url = f"![]()"
    pubdate = author_tag.find(class_="pubDate").attrs['data-value'][:10]
    text += (author.rstrip("\n") + "\n" + pubdate + "\n" + author_img_url +
             "\n")
    text = text.replace("Written By", "**Written By**")
    text = text.replace("Published", "**Published**")

    # 创建目录
    mkdir(dirname)

    # 写入文件
    writedoc(text, dirname, filename)

Example #4

Show file

File: convert_html.py Project: ecrax/hexo-recover-files

def build_file(name):
    path = "posts/" + name + "/index.html"

    # Added "encoding" parameter, else it would return null
    f = codecs.open(path, "r", encoding="utf-8")

    html = f.read()

    soup = BeautifulSoup(html, features="html5lib")

    # Save title
    md = "---"
    md += "\ntitle: " + soup.find("h1").get_text().strip()
    md += "\ndate: " + soup.find("time").get_text().replace("-", "/")

    # This line caused me some trouble, because in my html files
    # the tags were not displayed.
    #  md += "\ntags: " + soup.find("a", "tag-link").get_text()

    md += "\n---\n"

    content = soup.find(itemprop="articleBody")

    for child in content.children:
        if str(child)[:4] == "<img":
            end_point = child["src"].rfind("/") + 1
            file_name = child["src"][end_point:]
            text = "{% asset_img " + file_name + " %}"
            md += "\n" + text + "\n"
        elif str(child)[:2] == "<h":
            num_pounds = "#" * int(str(child)[2])
            md += "\n" + num_pounds + " " + child.get_text() + "\n"
        elif str(child)[:3] == "<if":
            md += "\n" + str(child) + "\n"
        elif str(child)[:24] == '<figure class="highlight':
            code_sample = str(child)

            code_type = code_sample[25:code_sample.find('"', 24)]

            temp_md = Tomd(str(child)).markdown
            temp_md = temp_md[temp_md.find('<td class="code"'):]
            temp_md = BeautifulSoup(temp_md, features="html5lib").find("pre")

            pre_md = str(temp_md)
            pre_md = pre_md[5:-6]

            temp_md = "\n``` "
            temp_md += code_type + "\n"
            for i, char in enumerate(pre_md):
                if pre_md[i:i + 5] == "<br/>":
                    temp_md += "\n"
                    temp_md += char
                else:
                    temp_md += char
            temp_md += "```"

            md += temp_md.replace("<br/>", "")
        else:
            md += Tomd(str(child)).markdown

    # Added "encoding" parameter, else it would throw a UnicodeEncodeError.
    with open("posts/" + name + ".md", "w", encoding="utf-8") as file:
        file.write(md)