Python md Examples, markdownify.md Python Examples

Example #1

0

Show file

File: test_conversions.py Project: pawelpieniazek/python-markdownify

def test_a_spaces():
    assert md('foo <a href="http://google.com">Google</a> bar'
              ) == 'foo [Google](http://google.com) bar'
    assert md('foo<a href="http://google.com"> Google</a> bar'
              ) == 'foo [Google](http://google.com) bar'
    assert md('foo <a href="http://google.com">Google </a>bar'
              ) == 'foo [Google](http://google.com) bar'
    assert md('foo <a href="http://google.com"></a> bar') == 'foo  bar'

Example #2

0

Show file

File: test_conversions.py Project: Hornobster/python-markdownify

def test_table():
    assert md(
        table
    ) == 'Firstname | Lastname | Age\n--- | --- | ---\nJill | Smith | 50\nEve | Jackson | 94'
    assert md(
        table_head_body
    ) == 'Firstname | Lastname | Age\n--- | --- | ---\nJill | Smith | 50\nEve | Jackson | 94'
    assert md(
        table_missing_text
    ) == 'Firstname | Lastname | Age\n--- | --- | ---\nJill |  | 50\nEve | Jackson | 94'

Example #3

0

Show file

def test_table():
    assert md(
        table
    ) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |'
    assert md(
        table_head_body
    ) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |'
    assert md(
        table_missing_text
    ) == '|  | Lastname | Age |\n| --- | --- | --- |\n| Jill |  | 50 |\n| Eve | Jackson | 94 |'

Example #4

0

Show file

def inline_tests(tag, markup):
    # Basically re-use test_em() and test_em_spaces(),
    assert md(f'<{tag}>Hello</{tag}>') == f'{markup}Hello{markup}'
    assert md(
        f'foo <{tag}>Hello</{tag}> bar') == f'foo {markup}Hello{markup} bar'
    assert md(
        f'foo<{tag}> Hello</{tag}> bar') == f'foo {markup}Hello{markup} bar'
    assert md(
        f'foo <{tag}>Hello </{tag}>bar') == f'foo {markup}Hello{markup} bar'
    assert md(f'foo <{tag}></{tag}> bar') in ['foo  bar',
                                              'foo bar']  # Either is OK

Example #5

0

Show file

File: LUCScraper.py Project: raulsperoni/resistencia.uy

def getAnteriorOriginal(notasArticulo):
    textoOriginal = None
    if (notasArticulo):
        textosOriginales = md(notasArticulo).split('**TEXTO ORIGINAL:**')
        if len(textosOriginales) > 1:
            #Saco de las notas el link y referencia del texto original
            textosOriginales = textosOriginales[1].split(',')
            linkUltimoTextoOriginal = 'https://www.impo.com.uy'+textosOriginales[0].split('](')[1].split(')')[0]
            #Descargo el texto original y me quedo con el texto
            data_articulo_original = getJsonFromUrl(linkUltimoTextoOriginal)
            textoOriginal = md(data_articulo_original['textoArticulo'],strip=['a','b'])
    return textoOriginal

Example #6

0

Show file

File: LUCScraper.py Project: raulsperoni/resistencia.uy

def buscarRedaccionModificada(tipo, destino):
    textoOriginalMarkdown = None
    textoModificadoMarkdown = None
    
    if ('nueva redaccion' in unidecode(tipo) or 'agrego a' in unidecode(tipo)):
        data_nueva_redaccion = getJsonFromUrl(destino)
        textoModificadoMarkdown = md(data_nueva_redaccion['textoArticulo'],strip=['a','b'])
        textoOriginalMarkdown = getAnteriorOriginal(md(data_nueva_redaccion['notasArticulo']))
    else:
        print(tipo,destino)
    
    return textoOriginalMarkdown, textoModificadoMarkdown

Example #7

0

Show file

File: test_conversions.py Project: pawelpieniazek/python-markdownify

def test_hn_nested_img():
    assert md(
        '<img src="/path/to/img.jpg" alt="Alt text" title="Optional title" />'
    ) == '![Alt text](/path/to/img.jpg "Optional title")'
    assert md('<img src="/path/to/img.jpg" alt="Alt text" />'
              ) == '![Alt text](/path/to/img.jpg)'
    image_attributes_to_markdown = [
        ("", ""),
        ("alt='Alt Text'", "Alt Text"),
        ("alt='Alt Text' title='Optional title'", "Alt Text"),
    ]
    for image_attributes, markdown in image_attributes_to_markdown:
        assert md('<h3>A <img src="/path/to/img.jpg " ' + image_attributes +
                  '/> B</h3>') == '### A ' + markdown + ' B\n\n'

Example #8

0

Show file

File: test_conversions.py Project: pawelpieniazek/python-markdownify

def test_a():
    assert md('<a href="https://google.com">Google</a>'
              ) == '[Google](https://google.com)'
    assert md('<a href="https://google.com">https://google.com</a>',
              autolinks=False) == '[https://google.com](https://google.com)'
    assert md('<a href="https://google.com">https://google.com</a>'
              ) == '<https://google.com>'
    assert md(
        '<a href="https://community.kde.org/Get_Involved">https://community.kde.org/Get_Involved</a>'
    ) == '<https://community.kde.org/Get_Involved>'
    assert md(
        '<a href="https://community.kde.org/Get_Involved">https://community.kde.org/Get_Involved</a>',
        autolinks=False
    ) == '[https://community.kde.org/Get\\_Involved](https://community.kde.org/Get_Involved)'

Example #9

0

Show file

File: wxPython-Lofter2Hexo.py Project: GWwwww/Lofter2Hexo

def get_comments(post, id2name_dict):
    md_comment_section = ''
    html_comment_section = ''

    commentList = post['commentList']
    comments = commentList['comment']
    if not isinstance(comments, list):
        comments = [comments]
    if comments:
        comments.reverse()

        md_comment_section += '\n\n<!-- more -->\n\n---\n'
        html_comment_section += '\n\n<p><!--more--></p>\n\n<hr />\n'

        for j in range(len(comments)):
            comment = comments[j]
            publisherUserId = comment['publisherUserId']
            publisherNick = comment['publisherNick']
            publisherContent = comment['content']
            commentPublishTime = comment['publishTime']
            commentPublishTime = int2time(commentPublishTime)
            replyToUserId = comment['replyToUserId']
            # decodedpublisherUserId = base64.b64decode(publisherUserId)  # 然而还是乱码……
            # decodedreplyToUserId = base64.b64decode(replyToUserId)  # 然而还是乱码……
            # publisherContentMD = html2text.html2text(publisherContent).strip('\r\n\t ')
            # publisherContentMD = md(publisherContent).strip('\r\n\t ')
            # publisherContentText = html.unescape(publisherContent)

            replyToStr = ''
            if replyToUserId in id2name_dict:
                Nicks = id2name_dict[replyToUserId]
                Nicks_only = [x[0] for x in Nicks]
                Nicks_only = deduce_list(Nicks_only)
                if len(Nicks_only) >= 2:
                    # print(Nicks)
                    pass
                Nicks.sort(key=lambda x: x[-1])
                Nick = Nicks[-1][0]
                replyToStr = ' 回复【' + md(Nick) + '】'

            md_line = '\n`' + commentPublishTime + '` 【' + md(
                publisherNick) + '】' + replyToStr + ' ' + md(
                    publisherContent) + '\n'
            html_line = '\n<p><code>' + commentPublishTime + '</code> 【' + publisherNick + '】' + replyToStr + ' ' + publisherContent + '</p>\n'

            md_comment_section += md_line
            html_comment_section += html_line
    return md_comment_section, html_comment_section

Example #10

0

Show file

File: cards.py Project: Dirjel/Jeeves

    def clean_card_text(self, text: str) -> (str, str):
        emoji_text = text
        for e, e_code in self.emojis.items():
            emoji_text = emoji_text.replace(f'[{e}]', e_code)

        lines = emoji_text.splitlines()
        return ('\n'.join(['' if t.startswith('<errata>') else md(t) for t in lines]), md(lines[-1]) if lines[-1].startswith('<errata>') else '')

Example #11

0

Show file

File: lambda_function.py Project: entechlog/python-examples

def process_conversion(file_name):

    cwd = os.getcwd()  # Get the current working directory (cwd)
    files = os.listdir(cwd)  # Get all the files in that directory
    print("Files in %r: %s" % (cwd, files))

    output_file_name = file_name.replace("html", "md")
    output_file = open(output_file_name, "w+")
    print("Output File Name             : ", output_file_name)

    with open(file_name, "r") as input_file:

        if converter == "html2markdown":
            md_str = html2markdown.convert(input_file)
            output_file.write(md_str)
        elif converter == "markdownify":
            md_str = md(input_file)
            output_file.write(md_str)
        elif converter == "tomd":
            md_str = tomd.Tomd(input_file.read()).markdown
            output_file.write(md_str)
        else:
            print("Not a valid converter")

    return input_file, output_file

Example #12

0

Show file

File: add_changelog_entry.py Project: sikelerd/WollMux

def make_entry(version, notes):
    """
    Erstellt aus der Version und den Release Notes einen Changelog Eintrag.
    :param version: Die Version des Eintrags.
    :param notes: Eine Liste mit Release Notes.
    :return: Der Changelog Eintrag.
    """
    entry = [" " + "=" *20 + " Neu in " + version.text + " " + "=" * 20 + "\n\n"]
    for note in notes:
        text = ET.tostring(note, encoding="unicode", method="html")
        text = text.replace("html:", "")
        lines = md(text, convert=['li', 'ol', 'ul'], bullets='*o¤').splitlines()
        for line in lines:
            line = line.rstrip()
            if len(line) == 0:
                continue
            line = line.replace("¤", "#")
            line = line.replace("\t ", " "*6)
            line = line.replace("\t", " "*6)
            count = len(line)-len(line.lstrip()) + 4
            if count == 5:
                count = 4
            wrapped_lines = wrap(line.lstrip(), width=80, initial_indent=" "*count, subsequent_indent=" "*(count + 2))
            for wl in wrapped_lines:
                entry.append(wl + "\n")
    entry.append("\n")
    return entry

Example #13

0

Show file

    def parse(self, response):
        article = response.css("article")[0]
        load = DefaultLoader(Post(), article)
        load.add_value("id", response.meta.get("id"))

        load.add_css("title", "h2::text")

        day = article.css(".dateDay::text").extract_first()
        month = article.css(".dateMonth::text").extract_first()
        year = article.css(".dateYear::text").extract_first()
        date = moment.date(f"{month} {day}, {year}").format("YYYY-MM-DD")
        load.add_value("date", date)

        body = "\n\n".join(article.css("section > p").extract())
        body = md(body).strip()
        load.add_value("body", body)

        rows = article.css(".table tr")
        for row in rows:
            about = row.css("h4::text").extract_first().strip(":")
            if about in {"author", "title", "pages", "publisher", "date"}:
                text = row.xpath(
                    "normalize-space(td[2])").extract_first().strip()
                load.add_value(f"book_{about}", text)

        yield load.load_item()

Example #14

0

Show file

File: parser.py Project: LifeDJIK/dusty

 def __init__(self, aem_hacker_output):
     tool = "AEM Hacker"
     severity = "Info"
     item_regex = re.compile(
         "".join([
             "^(\[\+\] New Finding!!!)$", "\s*Name: (?P<name>.*)$",
             "\s*Url: (?P<url>.*)$",
             "\s*Description: (?P<description>[\s\S]*?)\n\n"
         ]), re.MULTILINE)
     # Populate items
     self.items = list()
     for item in item_regex.finditer(aem_hacker_output):
         finding = Finding(
             title=item.group("name"),
             url=item.group("url"),
             description=md(item.group("description")),
             tool=tool,
             test=tool,
             severity=severity,
             active=False,
             verified=False,
             dynamic_finding=True,
             numerical_severity=Finding.get_numerical_severity(severity))
         finding.unsaved_endpoints = [
             make_endpoint_from_url(item.group("url"))
         ]
         self.items.append(finding)

Example #15

0

Show file

def retrieve_url(url):
    """Retrieve the markdown version of a site given a URL"""
    content = None
    # TODO: remove cruft at the end of URLs, e.g. site.com/bob.html?u=103&t=7
    if url_is_valid(url):
        try:
            url = tidify_url(url)
            response = requests.get(url)
            if response.status_code != 200:
                print("%d status code for %s" % (response.status_code, url))
            elif re.match("^https?://twitter.com/", response.url):
                print("tried to download a tweet")
            elif response.status_code == 200:
                soup = bs(response.content, features='html.parser')
                body = soup.find('body')
                if body is not None:
                    print('found content for', url)
                    for script in soup(["script", "style", "img"]):
                        script.decompose()
                    content = md(str(body))
        except requests.exceptions.ConnectionError as errc:
            print("Error Connecting:", errc)
        except requests.exceptions.Timeout as errt:
            print("Timeout Error:", errt)
        except requests.exceptions.RequestException as err:
            print("Error:", err)

    return content

Example #16

0

Show file

File: scraper.py Project: mantrir20/canvas-file-scraper

 def _markdownify(self, src_path, dest_path):
     if self._should_write(dest_path):
         self.logger.info(f"Converting {src_path} to markdown")
         with open(src_path, "r") as f:
             src = f.read()
         with open(dest_path, "w") as f:
             f.writelines(md(src))

Example #17

0

Show file

File: kankaview.py Project: arconyx/Athena-Cogs

    def __init__(self, campaign_id, json_data):
        self.campaign_id = campaign_id
        self.created_at = json_data.get('created_at')
        self.created_by = json_data.get('created_by')
        self.entity_id = json_data.get('entity_id')

        missing_entry_message = "<p>This entity doesn't have a description yet.</p>"
        raw_entry = json_data.get('entry_parsed') if json_data.get(
            'entry_parsed') is not None else missing_entry_message
        self.entry = md(raw_entry, strip=['img'])

        self.id = json_data.get('id')
        self.image = f"{STORAGE_PATH}{json_data.get('image')}" if json_data.get(
            'image') is not None else ""
        self.is_private = json_data.get('is_private')
        self.name = json_data.get('name')
        self.tags = json_data.get('tags')
        self.created_at = json_data.get('created_at')
        self.updated_at = json_data.get('updated_at')
        self.kind = json_data.get('type')

        self.files = {}
        if 'entity_files' in json_data:
            for entity in json_data.get('entity_files'):
                if entity.get('visibility') == 'all':
                    self.files[entity.get('name')] = entity.get('path')
        else:
            self.files = None

Example #18

0

Show file

def main():

    #获取收藏夹全部标题
    url = 'https://www.zhihu.com/collections/mine'
    html = getHtmlText(url)  #获取“我的收藏页面的html内容”
    title = getCollectionTitle(html)  #获取收藏夹的标题
    title_links = getCollectionLinks(html)  #获取收藏夹的链接，收藏夹收藏的内容>10条需要分页
    #判断该收藏夹是否大于10条收藏内容
    i = 0
    pages = []  #全部收藏页面
    for links in title_links:  #获取收藏夹内的所有收藏链接
        bookmark_detail = getHtmlText(links)  #点击收藏夹，进入收藏夹
        bookmark_detail_etree = etree.HTML(bookmark_detail)  #为lxml解析做准备
        max_page = bookmark_detail_etree.xpath(
            '//div[@class="zm-invite-pager"]/span[last()-1]/a/text()'
        )  #判断收藏条数是否大于10条

        if len(max_page) == 0:
            bookmark_page = links
            pages.append(bookmark_page)
            print(bookmark_page)

        if len(max_page) != 0:
            for i in range(1, int(max_page[0]) + 1):
                bookmark_page = links + '?page=' + str(i)
                pages.append(bookmark_page)
                print(bookmark_page)

    collection_title = []
    collection_link = []
    for i in range(len(pages)):
        pages_link = pages[i]
        pages_content = getHtmlText(pages_link)
        print(getContentTitle(pages_content), getContentLink(pages_content))
        collection_title.append(getContentTitle(pages_content))
        collection_link.append(getContentLink(pages_content))

    all_anwser_links = []
    all_titles = []
    for i in range(len(collection_link)):
        for j in range(len(collection_link[i])):
            all_anwser_links.append(collection_link[i][j])
            all_titles.append(collection_title[i][j])
            # print(all_anwser_links)
            with open("address.txt", 'a') as f:
                f.write(collection_link[i][j] + '\n')

    with open('address.txt', 'r+') as f:
        line = f.read()
    a = line.split('\n')

    for i in range(len(a)):
        time.sleep(random.randint(0, 9))
        every_content = getContentMarkdown(a[i])
        print('the {i} of all {b} competiton'.format(i=i, b=len(a)))
        if ((i / 100) + 1):
            with open('{a}.md'.format(a=int(i / 100)), 'a',
                      encoding='utf-8') as f:
                f.write(md(every_content))
                f.write('\n' * 2)

Example #19

0

Show file

File: 0007_auto_20190120_1309.py Project: lo-windigo/fragdev4000

def migrate_old_posts(app, schema_editor):
    """
    Migrate old blog data (stored in JSON) to the current Django models
    """
    import json
    from markdownify import markdownify as md

    # Get Wiblog models
    Comment = app.get_model('wiblog', 'Comment')
    Post = app.get_model('wiblog', 'Post')
    Tag = app.get_model('wiblog', 'Tag')

    # Keep a cache of tags to prevent hammering the DB
    tags = {}

    # Get the old post data
    old_json_file = join(settings.WEBROOT,
            'src/wiblog/migrations/0007_auto_20190120_1309.json')
    old_posts = json.load(open(old_json_file, 'r'))

    for old_post in old_posts:

        new_post = Post(status='DFT')

        new_post.body = md(old_post['body'])
        new_post.title = old_post['title']
        new_post.date = convert_mysql_date(old_post['date'])
        new_post.slug = slugify(old_post['title'])

        new_post.save()

        migrate_post_comments(Comment, new_post, old_post['comments'])
        migrate_post_tags(Tag, new_post, tags, old_post['categories'])

Example #20

0

Show file

File: main.py Project: leafan/grab-tools

def senMsg(favorite):
    try:
        html = api.GetStatusOembed(favorite.id)['html']
        html = md(html,strip = ['script'])
        print("get one status now, msg: ", html)
        
        data = json.dumps(
            {
                'msgtype': 'markdown',
            
                'markdown':{
                    'content': f'''
<font color="warning">{favorite.user.name}</font>
{html}
                    '''
                },
            
            }
        )

        post(
            WECHAT_URL,
            data=data
        )

    except Exception as e:
        console.log("Error senMsg, err: ", e)

Example #21

0

Show file

File: main.py Project: Vancir/GoSSIP-NewsBot

def genMarkdown(html, outdir):
    # convert html to markdown and dump
    # TODO: fix link contains '\_'
    content = md(html)
    output = outdir / 'README.md'
    with output.open('w') as fp:
        fp.write(content)

Example #22

0

Show file

def markdownOfThread(ID=11.3):
    from markdownify import markdownify as md
    RET='\n'
    posts = getPostsFromThread(ID)
    print('getPostFromThread returned', len(posts), 'items')
    print('Posts0==', posts[0])
    title = f"""**{posts[0]['body'][:80]}...**
    
Category:{posts[0]['category']}

{posts[0]['isoformat'][:-15]} | {posts[0]['isoformat'][11:16]}
___
"""
            
    content = [f'\n{title}\n\n']
    for post in posts:
        postBody = md(post['body_html'])#.replace('<p>',RET).replace('</p>',RET)
        content.append(
f"""
![avatar]({avatarImg(post['username'])})  {post['username']}  

{post['isoformat'][:-15]} | {post['isoformat'][11:16]}

{postBody}
___
""")

    markdown = '\n'.join(content)
    return markdown

Example #23

0

Show file

File: announcements.py Project: Soheab/BlistBot

 def __init__(self, data: dict) -> None:
     self.content: str = str(md(str(data.get('announcement', None)))).strip()
     self.created_at: datetime = data.get('time', None)
     self.is_pinned: bool = data.get('pinned', None)
     self.id: int = data.get('unique_id', None)
     self.bot_id: int = data.get('bot_id', None)
     self.author_id: int = data.get('creator_id', None)

Example #24

0

Show file

File: markdownOfThread.py Project: jonschull/macroscope

def markdownOfThread(df, ID=11.3):
    from markdownify import markdownify as md
    RET = '\n'
    posts = getPostsFromThread(df, ID)

    title = f"""**{posts[0]['body'][:80]}...**
    
Category:{str(posts[0]['topics'])}

{str(posts[0]['datetime'])[:-15]} | {str(posts[0]['datetime'])[11:16]} 

___
"""

    content = [f'\n{title}\n\n']
    for post in posts:
        postBody = md(post['body'])  #.replace('<p>',RET).replace('</p>',RET)
        content.append(f"""
![avatar]({avatarImg(post['name'])})  {post['name']}  

{str(post['datetime'])[:-15]} | {str(post['datetime'])[11:16]}


{postBody}
___
""")

    markdown = '\n'.join(content)
    return markdown

Example #25

0

Show file

 async def _send(self, json):
     """/message POST
     This is called by the REST API to send messages
     Messages are handled separately due to them being logged.
     Embeds are not logged.
     Messages are saved to the database and sent out via the webhook.
     The input argument is a json object cast to a dict by the webserver.
     It must contain the user, campaign, contents, and avatar URL.
     The campaign field must match up to a pre-configured campaign.
     It handles where the message should be sent, and which webhook."""
     campaign_entry = self.db.search((self.search.doctype == "campaign") & (
         self.search.campaign == json["campaign"]))
     if not campaign_entry:
         return
     else:
         campaign_entry = campaign_entry[0]
     json["content"] = md(json["content"])
     note_data = {
         "doctype": "rpbridge",
         "user": json["user"],
         "campaign": json["campaign"],
         "timestamp": datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
         "contents": json["content"]
     }
     webhook = await self.bot.fetch_webhook(campaign_entry["webhook"])
     message = await webhook.send(json["content"],
                                  wait=True,
                                  username=json["user"],
                                  avatar_url=json["avatar_url"])
     note_data["message"] = message.id
     self.db.insert(note_data)

Example #26

0

Show file

    def run(self, dispatcher: CollectingDispatcher, tracker: Tracker,
            domain: Dict[Text, Any]):
        disease = tracker.get_slot("disease")
        pre_disease = tracker.get_slot("sure")
        print("pre_disease::::" + str(pre_disease))

        possible_diseases = retrieve_disease_name(disease)
        if disease == pre_disease or len(possible_diseases) == 1:
            a = graph.run(
                "match (a:Disease{name: {disease}}) return a.prevent",
                disease=disease).data()[0]
            if 'a.prevent' in a:
                prevent = a['a.prevent']
                template = "以下是有关预防 {0} 的知识：{1}"
                retmsg = template.format(disease,
                                         md(prevent.replace('\n', '<br/>')))
            else:
                retmsg = disease + "暂无常见预防方法"
            dispatcher.utter_message(retmsg)
        elif len(possible_diseases) > 1:
            buttons = []
            for d in possible_diseases:
                buttons.append(
                    make_button(
                        d,
                        '/search_prevention{{"disease":"{0}", "sure":"{1}"}}'.
                        format(d, d)))
            dispatcher.utter_button_message("请点击选择想查询的疾病，若没有想要的，请忽略此消息",
                                            buttons)
        else:
            dispatcher.utter_message("知识库中暂无与 {0} 相关的预防记录".format(disease))
        return []

Example #27

0

Show file

    def create(self, request):
        action_serializer = ActionSerializer(data=request.data)

        if not action_serializer.is_valid():
            return Response(action_serializer.errors,
                            status=status.HTTP_400_BAD_REQUEST)

        partners_involved = action_serializer.validated_data.pop(
            'partners_involved')
        principles = action_serializer.validated_data.pop('principles')
        sustainable_development_goals = action_serializer.validated_data.pop(
            'sustainable_development_goals')
        action_data = Action.objects.create(**action_serializer.validated_data)

        setattr(action_data, 'cooperative_id', request.user.cooperative.id)
        for partner in partners_involved:
            action_data.partners_involved.add(partner['id'])

        for principle in principles:
            action_data.principles.add(principle['id'])

        for goal in sustainable_development_goals:
            action_data.sustainable_development_goals.add(goal['id'])

        action_data.description = md(
            action_data.description) if action_data.description else None
        action_data.save()
        return Response("ACTION_CREATED", status=status.HTTP_200_OK)

Example #28

0

Show file

File: test_conversions.py Project: pawelpieniazek/python-markdownify

def test_chomp():
    assert md(' <b></b> ') == '  '
    assert md(' <b> </b> ') == '  '
    assert md(' <b>  </b> ') == '  '
    assert md(' <b>   </b> ') == '  '
    assert md(' <b>s </b> ') == ' **s**  '
    assert md(' <b> s</b> ') == '  **s** '
    assert md(' <b> s </b> ') == '  **s**  '
    assert md(' <b>  s  </b> ') == '  **s**  '

Example #29

0

Show file

File: parser.py Project: LifeDJIK/dusty

 def __init__(self, zap_result, tool_name):
     zap_json = json.loads(zap_result)
     # Populate items
     self.items = list()
     for site in zap_json["site"]:
         for alert in site["alerts"]:
             description = list()
             if "desc" in alert:
                 description.append(md(alert["desc"]))
             if "solution" in alert:
                 description.append(f'**Solution**:\n {md(alert["solution"])}')
             if "reference" in alert:
                 description.append(f'**Reference**:\n {md(alert["reference"])}')
             if "otherinfo" in alert:
                 description.append(f'**Other information**:\n {md(alert["otherinfo"])}')
             description.append(f'**Confidence**: {md(c.ZAP_CONFIDENCES[alert["confidence"]])}')
             description = "\n".join(description)
             instances = list()
             if alert["instances"]:
                 instances.append("\n")
                 instances.append("| URI | Method | Parameter | Attack | Evidence |")
                 instances.append("| --- | ------ | --------- | ------ | -------- |")
             for item in alert["instances"]:
                 instances.append("| {} |".format(" | ".join([
                     html.escape(md_table_escape(item.get("uri", "-"))),
                     html.escape(md_table_escape(item.get("method", "-"))),
                     html.escape(md_table_escape(item.get("param", "-"))),
                     html.escape(md_table_escape(item.get("attack", "-"))),
                     html.escape(md_table_escape(item.get("evidence", "-")))
                 ])))
             finding = Finding(
                 title=alert["name"],
                 url=site["@name"],
                 description=description,
                 payload="\n".join(instances),
                 tool=tool_name,
                 test=tool_name,
                 severity=c.ZAP_SEVERITIES[alert["riskcode"]],
                 active=False,
                 verified=False,
                 dynamic_finding=True,
                 numerical_severity=Finding.get_numerical_severity(
                     c.ZAP_SEVERITIES[alert["riskcode"]]
                 )
             )
             finding.unsaved_endpoints = list()
             added_endpoints = set()
             for item in alert["instances"]:
                 if not item.get("uri", None):
                     continue
                 endpoint = make_endpoint_from_url(
                     item.get("uri"),
                     include_query=False, include_fragment=False
                 )
                 if str(endpoint) in added_endpoints:
                     continue
                 finding.unsaved_endpoints.append(endpoint)
                 added_endpoints.add(str(endpoint))
             self.items.append(finding)

Example #30

0

Show file

    def get_items(self, tree, test):
        """
        @return items A list of Host instances
        """

        items = list()
        for node in tree.findall('site'):
            site = Site(node)
            main_host = Endpoint(host=site.ip +
                                 site.port if site.port is not None else "")
            for item in site.items:
                severity = item.riskdesc.split(' ', 1)[0]
                references = ''
                for ref in item.ref:
                    references += ref + "\n"

                find = Finding(
                    title=item.name,
                    tool="ZAP",
                    cwe=item.cwe,
                    description=md(item.desc),
                    test=test,
                    severity=severity,
                    mitigation=md(item.resolution),
                    references=references,
                    active=False,
                    verified=False,
                    false_p=False,
                    duplicate=False,
                    out_of_scope=False,
                    mitigated=None,
                    impact="No impact provided",
                    numerical_severity=Finding.get_numerical_severity(
                        severity))

                find.unsaved_endpoints = [main_host]
                for i in item.items:
                    parts = urlparse(i['uri'])
                    find.unsaved_endpoints.append(
                        Endpoint(protocol=parts.scheme,
                                 host=parts.netloc,
                                 path=parts.path,
                                 query=parts.query,
                                 fragment=parts.fragment))
                items.append(find)
        return items

Example #31

0

Show file

File: test_conversions.py Project: pawelpieniazek/python-markdownify

def test_nested_uls():
    """
    Nested ULs should alternate bullet characters.

    """
    assert md(
        nested_uls
    ) == '\n* 1\n\t+ a\n\t\t- I\n\t\t- II\n\t\t- III\n\t+ b\n\t+ c\n* 2\n* 3\n\n'

Example #32

0

Show file

File: convert_rich_text_to_markdown.py Project: macarthur-lab/seqr

def convert_to_markdown(s):
    """Converts an html string to markdown"""

    s = unicode(s).encode('utf-8')
    #s = ''.join([i if ord(i) < 128 else ' ' for i in s])
    #print("Original:\n" + s)
    #print("====")

    s = strip(s, '[if', 'endif]')
    s = strip(s, '<!--', '-->')
    s = s.replace('<div>', "<br /><div>")
    s = s.replace('<tr>', "<br /><tr>")
    s = s.replace('<ul>', "<br /><ul>")
    s = s.replace('<li>', "<br /><li>")
    s = md(s).encode('utf-8')
    s = s.strip('"')
    s = s.strip()

    return s

Example #33

0

Show file

File: test_escaping.py Project: judithdrive/python-markdownify

def test_named_entities():
    assert md('&raquo;') == u'\xbb'

Example #34

0

Show file

File: test_conversions.py Project: judithdrive/python-markdownify

def test_br():
    assert md('a<br />b<br />c') == 'a  \nb  \nc'

Example #35

0

Show file

File: test_conversions.py Project: judithdrive/python-markdownify

def test_hn():
    assert md('<h3>Hello</h3>') == '### Hello\n\n'
    assert md('<h6>Hello</h6>') == '###### Hello\n\n'

Example #36

0

Show file

File: test_conversions.py Project: judithdrive/python-markdownify

def test_nested_blockquote():
    text = md('<blockquote>And she was like <blockquote>Hello</blockquote></blockquote>').strip()
    assert text == '> And she was like \n> > Hello'

Example #37

0

Show file

File: test_conversions.py Project: judithdrive/python-markdownify

def test_img():
    assert md('<img src="/path/to/img.jpg" alt="Alt text" title="Optional title" />') == '![Alt text](/path/to/img.jpg "Optional title")'
    assert md('<img src="/path/to/img.jpg" alt="Alt text" />') == '![Alt text](/path/to/img.jpg)'

Example #38

0

Show file

File: test_escaping.py Project: judithdrive/python-markdownify

def test_underscore():
    assert md('_hey_dude_') == '\_hey\_dude\_'

Example #39

0

Show file

File: test_advanced.py Project: judithdrive/python-markdownify

def test_nested():
    text = md('<p>This is an <a href="http://example.com/">example link</a>.</p>')
    assert text == 'This is an [example link](http://example.com/).\n\n'

Example #40

0

Show file

File: test_conversions.py Project: judithdrive/python-markdownify

def test_strong():
    assert md('<strong>Hello</strong>') == '**Hello**'

Example #41

0

Show file

File: test_conversions.py Project: judithdrive/python-markdownify

def test_p():
    assert md('<p>hello</p>') == 'hello\n\n'

Example #42

0

Show file

File: test_conversions.py Project: judithdrive/python-markdownify

def test_ol():
    assert md('<ol><li>a</li><li>b</li></ol>') == '1. a\n2. b\n'

Example #43

0

Show file

File: test_conversions.py Project: judithdrive/python-markdownify

def test_i():
    assert md('<i>Hello</i>') == '*Hello*'

Example #44

0

Show file

File: test_conversions.py Project: judithdrive/python-markdownify

def test_atx_closed_headings():
    assert md('<h1>Hello</h1>', heading_style=ATX_CLOSED) == '# Hello #\n\n'
    assert md('<h2>Hello</h2>', heading_style=ATX_CLOSED) == '## Hello ##\n\n'

Example #45

0

Show file

File: test_conversions.py Project: judithdrive/python-markdownify

def test_atx_headings():
    assert md('<h1>Hello</h1>', heading_style=ATX) == '# Hello\n\n'
    assert md('<h2>Hello</h2>', heading_style=ATX) == '## Hello\n\n'

Example #46

0

Show file

File: test_escaping.py Project: judithdrive/python-markdownify

def test_hexadecimal_entities():
    # This looks to be a bug in BeautifulSoup (fixed in bs4) that we have to work around.
    assert md('&#x27;') == '\x27'

Example #47

0

Show file

File: test_escaping.py Project: judithdrive/python-markdownify

def test_single_escaping_entities():
    assert md('&amp;amp;') == '&amp;'

Example #48

0

Show file

File: test_conversions.py Project: judithdrive/python-markdownify

def test_a_with_title():
    text = md('<a href="http://google.com" title="The &quot;Goog&quot;">Google</a>')
    assert text == r'[Google](http://google.com "The \"Goog\"")'

Example #49

0

Show file

File: test_escaping.py Project: judithdrive/python-markdownify

def test_xml_entities():
    assert md('&amp;') == '&'

Example #50

0

Show file

File: test_conversions.py Project: judithdrive/python-markdownify

def test_ul():
    assert md('<ul><li>a</li><li>b</li></ul>') == '* a\n* b\n'

Example #51

0

Show file

File: test_conversions.py Project: judithdrive/python-markdownify

def test_bullets():
    assert md(nested_uls, bullets='-') == '- 1\n\t- a\n\t\t- I\n\t\t- II\n\t\t- III\n\t\t\n\t- b\n\t- c\n\t\n- 2\n- 3\n'

Example #52

0

Show file

File: test_conversions.py Project: judithdrive/python-markdownify

def test_blockquote():
    assert md('<blockquote>Hello</blockquote>').strip() == '> Hello'

Example #53

0

Show file

File: test_conversions.py Project: judithdrive/python-markdownify

def test_nested_uls():
    """
    Nested ULs should alternate bullet characters.

    """
    assert md(nested_uls) == '* 1\n\t+ a\n\t\t- I\n\t\t- II\n\t\t- III\n\t\t\n\t+ b\n\t+ c\n\t\n* 2\n* 3\n'

Example #54

0

Show file

File: test_conversions.py Project: judithdrive/python-markdownify

def test_a():
    assert md('<a href="http://google.com">Google</a>') == '[Google](http://google.com)'

Example #55

0

Show file

File: test_conversions.py Project: judithdrive/python-markdownify

def test_em():
    assert md('<em>Hello</em>') == '*Hello*'

Example #56

0

Show file

File: test_conversions.py Project: judithdrive/python-markdownify

def test_h1():
    assert md('<h1>Hello</h1>') == 'Hello\n=====\n\n'

Example #57

0

Show file

File: test_conversions.py Project: judithdrive/python-markdownify

def test_a_no_autolinks():
    text = md('<a href="http://google.com">http://google.com</a>', autolinks=False)
    assert text == '[http://google.com](http://google.com)'

Example #58

0

Show file

File: test_conversions.py Project: judithdrive/python-markdownify

def test_b():
    assert md('<b>Hello</b>') == '**Hello**'

Example #59

0

Show file

File: test_conversions.py Project: judithdrive/python-markdownify

def test_h2():
    assert md('<h2>Hello</h2>') == 'Hello\n-----\n\n'

Example #60

0

Show file

File: test_conversions.py Project: judithdrive/python-markdownify

def test_a_shortcut():
    text = md('<a href="http://google.com">http://google.com</a>')
    assert text == '<http://google.com>'