def test_a_spaces():
    assert md('foo <a href="http://google.com">Google</a> bar'
              ) == 'foo [Google](http://google.com) bar'
    assert md('foo<a href="http://google.com"> Google</a> bar'
              ) == 'foo [Google](http://google.com) bar'
    assert md('foo <a href="http://google.com">Google </a>bar'
              ) == 'foo [Google](http://google.com) bar'
    assert md('foo <a href="http://google.com"></a> bar') == 'foo  bar'
def test_table():
    assert md(
        table
    ) == 'Firstname | Lastname | Age\n--- | --- | ---\nJill | Smith | 50\nEve | Jackson | 94'
    assert md(
        table_head_body
    ) == 'Firstname | Lastname | Age\n--- | --- | ---\nJill | Smith | 50\nEve | Jackson | 94'
    assert md(
        table_missing_text
    ) == 'Firstname | Lastname | Age\n--- | --- | ---\nJill |  | 50\nEve | Jackson | 94'
Example #3
0
def test_table():
    assert md(
        table
    ) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |'
    assert md(
        table_head_body
    ) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |'
    assert md(
        table_missing_text
    ) == '|  | Lastname | Age |\n| --- | --- | --- |\n| Jill |  | 50 |\n| Eve | Jackson | 94 |'
Example #4
0
def inline_tests(tag, markup):
    # Basically re-use test_em() and test_em_spaces(),
    assert md(f'<{tag}>Hello</{tag}>') == f'{markup}Hello{markup}'
    assert md(
        f'foo <{tag}>Hello</{tag}> bar') == f'foo {markup}Hello{markup} bar'
    assert md(
        f'foo<{tag}> Hello</{tag}> bar') == f'foo {markup}Hello{markup} bar'
    assert md(
        f'foo <{tag}>Hello </{tag}>bar') == f'foo {markup}Hello{markup} bar'
    assert md(f'foo <{tag}></{tag}> bar') in ['foo  bar',
                                              'foo bar']  # Either is OK
Example #5
0
def getAnteriorOriginal(notasArticulo):
    textoOriginal = None
    if (notasArticulo):
        textosOriginales = md(notasArticulo).split('**TEXTO ORIGINAL:**')
        if len(textosOriginales) > 1:
            #Saco de las notas el link y referencia del texto original
            textosOriginales = textosOriginales[1].split(',')
            linkUltimoTextoOriginal = 'https://www.impo.com.uy'+textosOriginales[0].split('](')[1].split(')')[0]
            #Descargo el texto original y me quedo con el texto
            data_articulo_original = getJsonFromUrl(linkUltimoTextoOriginal)
            textoOriginal = md(data_articulo_original['textoArticulo'],strip=['a','b'])
    return textoOriginal
Example #6
0
def buscarRedaccionModificada(tipo, destino):
    textoOriginalMarkdown = None
    textoModificadoMarkdown = None
    
    if ('nueva redaccion' in unidecode(tipo) or 'agrego a' in unidecode(tipo)):
        data_nueva_redaccion = getJsonFromUrl(destino)
        textoModificadoMarkdown = md(data_nueva_redaccion['textoArticulo'],strip=['a','b'])
        textoOriginalMarkdown = getAnteriorOriginal(md(data_nueva_redaccion['notasArticulo']))
    else:
        print(tipo,destino)
    
    return textoOriginalMarkdown, textoModificadoMarkdown
def test_hn_nested_img():
    assert md(
        '<img src="/path/to/img.jpg" alt="Alt text" title="Optional title" />'
    ) == '![Alt text](/path/to/img.jpg "Optional title")'
    assert md('<img src="/path/to/img.jpg" alt="Alt text" />'
              ) == '![Alt text](/path/to/img.jpg)'
    image_attributes_to_markdown = [
        ("", ""),
        ("alt='Alt Text'", "Alt Text"),
        ("alt='Alt Text' title='Optional title'", "Alt Text"),
    ]
    for image_attributes, markdown in image_attributes_to_markdown:
        assert md('<h3>A <img src="/path/to/img.jpg " ' + image_attributes +
                  '/> B</h3>') == '### A ' + markdown + ' B\n\n'
def test_a():
    assert md('<a href="https://google.com">Google</a>'
              ) == '[Google](https://google.com)'
    assert md('<a href="https://google.com">https://google.com</a>',
              autolinks=False) == '[https://google.com](https://google.com)'
    assert md('<a href="https://google.com">https://google.com</a>'
              ) == '<https://google.com>'
    assert md(
        '<a href="https://community.kde.org/Get_Involved">https://community.kde.org/Get_Involved</a>'
    ) == '<https://community.kde.org/Get_Involved>'
    assert md(
        '<a href="https://community.kde.org/Get_Involved">https://community.kde.org/Get_Involved</a>',
        autolinks=False
    ) == '[https://community.kde.org/Get\\_Involved](https://community.kde.org/Get_Involved)'
def get_comments(post, id2name_dict):
    md_comment_section = ''
    html_comment_section = ''

    commentList = post['commentList']
    comments = commentList['comment']
    if not isinstance(comments, list):
        comments = [comments]
    if comments:
        comments.reverse()

        md_comment_section += '\n\n<!-- more -->\n\n---\n'
        html_comment_section += '\n\n<p><!--more--></p>\n\n<hr />\n'

        for j in range(len(comments)):
            comment = comments[j]
            publisherUserId = comment['publisherUserId']
            publisherNick = comment['publisherNick']
            publisherContent = comment['content']
            commentPublishTime = comment['publishTime']
            commentPublishTime = int2time(commentPublishTime)
            replyToUserId = comment['replyToUserId']
            # decodedpublisherUserId = base64.b64decode(publisherUserId)  # 然而还是乱码……
            # decodedreplyToUserId = base64.b64decode(replyToUserId)  # 然而还是乱码……
            # publisherContentMD = html2text.html2text(publisherContent).strip('\r\n\t ')
            # publisherContentMD = md(publisherContent).strip('\r\n\t ')
            # publisherContentText = html.unescape(publisherContent)

            replyToStr = ''
            if replyToUserId in id2name_dict:
                Nicks = id2name_dict[replyToUserId]
                Nicks_only = [x[0] for x in Nicks]
                Nicks_only = deduce_list(Nicks_only)
                if len(Nicks_only) >= 2:
                    # print(Nicks)
                    pass
                Nicks.sort(key=lambda x: x[-1])
                Nick = Nicks[-1][0]
                replyToStr = ' 回复【' + md(Nick) + '】'

            md_line = '\n`' + commentPublishTime + '` 【' + md(
                publisherNick) + '】' + replyToStr + ' ' + md(
                    publisherContent) + '\n'
            html_line = '\n<p><code>' + commentPublishTime + '</code> 【' + publisherNick + '】' + replyToStr + ' ' + publisherContent + '</p>\n'

            md_comment_section += md_line
            html_comment_section += html_line
    return md_comment_section, html_comment_section
Example #10
0
    def clean_card_text(self, text: str) -> (str, str):
        emoji_text = text
        for e, e_code in self.emojis.items():
            emoji_text = emoji_text.replace(f'[{e}]', e_code)

        lines = emoji_text.splitlines()
        return ('\n'.join(['' if t.startswith('<errata>') else md(t) for t in lines]), md(lines[-1]) if lines[-1].startswith('<errata>') else '')
def process_conversion(file_name):

    cwd = os.getcwd()  # Get the current working directory (cwd)
    files = os.listdir(cwd)  # Get all the files in that directory
    print("Files in %r: %s" % (cwd, files))

    output_file_name = file_name.replace("html", "md")
    output_file = open(output_file_name, "w+")
    print("Output File Name             : ", output_file_name)

    with open(file_name, "r") as input_file:

        if converter == "html2markdown":
            md_str = html2markdown.convert(input_file)
            output_file.write(md_str)
        elif converter == "markdownify":
            md_str = md(input_file)
            output_file.write(md_str)
        elif converter == "tomd":
            md_str = tomd.Tomd(input_file.read()).markdown
            output_file.write(md_str)
        else:
            print("Not a valid converter")

    return input_file, output_file
Example #12
0
def make_entry(version, notes):
    """
    Erstellt aus der Version und den Release Notes einen Changelog Eintrag.
    :param version: Die Version des Eintrags.
    :param notes: Eine Liste mit Release Notes.
    :return: Der Changelog Eintrag.
    """
    entry = [" " + "=" *20 + " Neu in " + version.text + " " + "=" * 20 + "\n\n"]
    for note in notes:
        text = ET.tostring(note, encoding="unicode", method="html")
        text = text.replace("html:", "")
        lines = md(text, convert=['li', 'ol', 'ul'], bullets='*o¤').splitlines()
        for line in lines:
            line = line.rstrip()
            if len(line) == 0:
                continue
            line = line.replace("¤", "#")
            line = line.replace("\t ", " "*6)
            line = line.replace("\t", " "*6)
            count = len(line)-len(line.lstrip()) + 4
            if count == 5:
                count = 4
            wrapped_lines = wrap(line.lstrip(), width=80, initial_indent=" "*count, subsequent_indent=" "*(count + 2))
            for wl in wrapped_lines:
                entry.append(wl + "\n")
    entry.append("\n")
    return entry
Example #13
0
    def parse(self, response):
        article = response.css("article")[0]
        load = DefaultLoader(Post(), article)
        load.add_value("id", response.meta.get("id"))

        load.add_css("title", "h2::text")

        day = article.css(".dateDay::text").extract_first()
        month = article.css(".dateMonth::text").extract_first()
        year = article.css(".dateYear::text").extract_first()
        date = moment.date(f"{month} {day}, {year}").format("YYYY-MM-DD")
        load.add_value("date", date)

        body = "\n\n".join(article.css("section > p").extract())
        body = md(body).strip()
        load.add_value("body", body)

        rows = article.css(".table tr")
        for row in rows:
            about = row.css("h4::text").extract_first().strip(":")
            if about in {"author", "title", "pages", "publisher", "date"}:
                text = row.xpath(
                    "normalize-space(td[2])").extract_first().strip()
                load.add_value(f"book_{about}", text)

        yield load.load_item()
Example #14
0
 def __init__(self, aem_hacker_output):
     tool = "AEM Hacker"
     severity = "Info"
     item_regex = re.compile(
         "".join([
             "^(\[\+\] New Finding!!!)$", "\s*Name: (?P<name>.*)$",
             "\s*Url: (?P<url>.*)$",
             "\s*Description: (?P<description>[\s\S]*?)\n\n"
         ]), re.MULTILINE)
     # Populate items
     self.items = list()
     for item in item_regex.finditer(aem_hacker_output):
         finding = Finding(
             title=item.group("name"),
             url=item.group("url"),
             description=md(item.group("description")),
             tool=tool,
             test=tool,
             severity=severity,
             active=False,
             verified=False,
             dynamic_finding=True,
             numerical_severity=Finding.get_numerical_severity(severity))
         finding.unsaved_endpoints = [
             make_endpoint_from_url(item.group("url"))
         ]
         self.items.append(finding)
Example #15
0
def retrieve_url(url):
    """Retrieve the markdown version of a site given a URL"""
    content = None
    # TODO: remove cruft at the end of URLs, e.g. site.com/bob.html?u=103&t=7
    if url_is_valid(url):
        try:
            url = tidify_url(url)
            response = requests.get(url)
            if response.status_code != 200:
                print("%d status code for %s" % (response.status_code, url))
            elif re.match("^https?://twitter.com/", response.url):
                print("tried to download a tweet")
            elif response.status_code == 200:
                soup = bs(response.content, features='html.parser')
                body = soup.find('body')
                if body is not None:
                    print('found content for', url)
                    for script in soup(["script", "style", "img"]):
                        script.decompose()
                    content = md(str(body))
        except requests.exceptions.ConnectionError as errc:
            print("Error Connecting:", errc)
        except requests.exceptions.Timeout as errt:
            print("Timeout Error:", errt)
        except requests.exceptions.RequestException as err:
            print("Error:", err)

    return content
Example #16
0
 def _markdownify(self, src_path, dest_path):
     if self._should_write(dest_path):
         self.logger.info(f"Converting {src_path} to markdown")
         with open(src_path, "r") as f:
             src = f.read()
         with open(dest_path, "w") as f:
             f.writelines(md(src))
Example #17
0
    def __init__(self, campaign_id, json_data):
        self.campaign_id = campaign_id
        self.created_at = json_data.get('created_at')
        self.created_by = json_data.get('created_by')
        self.entity_id = json_data.get('entity_id')

        missing_entry_message = "<p>This entity doesn't have a description yet.</p>"
        raw_entry = json_data.get('entry_parsed') if json_data.get(
            'entry_parsed') is not None else missing_entry_message
        self.entry = md(raw_entry, strip=['img'])

        self.id = json_data.get('id')
        self.image = f"{STORAGE_PATH}{json_data.get('image')}" if json_data.get(
            'image') is not None else ""
        self.is_private = json_data.get('is_private')
        self.name = json_data.get('name')
        self.tags = json_data.get('tags')
        self.created_at = json_data.get('created_at')
        self.updated_at = json_data.get('updated_at')
        self.kind = json_data.get('type')

        self.files = {}
        if 'entity_files' in json_data:
            for entity in json_data.get('entity_files'):
                if entity.get('visibility') == 'all':
                    self.files[entity.get('name')] = entity.get('path')
        else:
            self.files = None
Example #18
0
def main():

    #获取收藏夹全部标题
    url = 'https://www.zhihu.com/collections/mine'
    html = getHtmlText(url)  #获取“我的收藏页面的html内容”
    title = getCollectionTitle(html)  #获取收藏夹的标题
    title_links = getCollectionLinks(html)  #获取收藏夹的链接,收藏夹收藏的内容>10条需要分页
    #判断该收藏夹是否大于10条收藏内容
    i = 0
    pages = []  #全部收藏页面
    for links in title_links:  #获取收藏夹内的所有收藏链接
        bookmark_detail = getHtmlText(links)  #点击收藏夹,进入收藏夹
        bookmark_detail_etree = etree.HTML(bookmark_detail)  #为lxml解析做准备
        max_page = bookmark_detail_etree.xpath(
            '//div[@class="zm-invite-pager"]/span[last()-1]/a/text()'
        )  #判断收藏条数是否大于10条

        if len(max_page) == 0:
            bookmark_page = links
            pages.append(bookmark_page)
            print(bookmark_page)

        if len(max_page) != 0:
            for i in range(1, int(max_page[0]) + 1):
                bookmark_page = links + '?page=' + str(i)
                pages.append(bookmark_page)
                print(bookmark_page)

    collection_title = []
    collection_link = []
    for i in range(len(pages)):
        pages_link = pages[i]
        pages_content = getHtmlText(pages_link)
        print(getContentTitle(pages_content), getContentLink(pages_content))
        collection_title.append(getContentTitle(pages_content))
        collection_link.append(getContentLink(pages_content))

    all_anwser_links = []
    all_titles = []
    for i in range(len(collection_link)):
        for j in range(len(collection_link[i])):
            all_anwser_links.append(collection_link[i][j])
            all_titles.append(collection_title[i][j])
            # print(all_anwser_links)
            with open("address.txt", 'a') as f:
                f.write(collection_link[i][j] + '\n')

    with open('address.txt', 'r+') as f:
        line = f.read()
    a = line.split('\n')

    for i in range(len(a)):
        time.sleep(random.randint(0, 9))
        every_content = getContentMarkdown(a[i])
        print('the {i} of all {b} competiton'.format(i=i, b=len(a)))
        if ((i / 100) + 1):
            with open('{a}.md'.format(a=int(i / 100)), 'a',
                      encoding='utf-8') as f:
                f.write(md(every_content))
                f.write('\n' * 2)
def migrate_old_posts(app, schema_editor):
    """
    Migrate old blog data (stored in JSON) to the current Django models
    """
    import json
    from markdownify import markdownify as md

    # Get Wiblog models
    Comment = app.get_model('wiblog', 'Comment')
    Post = app.get_model('wiblog', 'Post')
    Tag = app.get_model('wiblog', 'Tag')

    # Keep a cache of tags to prevent hammering the DB
    tags = {}

    # Get the old post data
    old_json_file = join(settings.WEBROOT,
            'src/wiblog/migrations/0007_auto_20190120_1309.json')
    old_posts = json.load(open(old_json_file, 'r'))

    for old_post in old_posts:

        new_post = Post(status='DFT')

        new_post.body = md(old_post['body'])
        new_post.title = old_post['title']
        new_post.date = convert_mysql_date(old_post['date'])
        new_post.slug = slugify(old_post['title'])

        new_post.save()

        migrate_post_comments(Comment, new_post, old_post['comments'])
        migrate_post_tags(Tag, new_post, tags, old_post['categories'])
Example #20
0
def senMsg(favorite):
    try:
        html = api.GetStatusOembed(favorite.id)['html']
        html = md(html,strip = ['script'])
        print("get one status now, msg: ", html)
        
        data = json.dumps(
            {
                'msgtype': 'markdown',
            
                'markdown':{
                    'content': f'''
<font color="warning">{favorite.user.name}</font>
{html}
                    '''
                },
            
            }
        )

        post(
            WECHAT_URL,
            data=data
        )

    except Exception as e:
        console.log("Error senMsg, err: ", e)
Example #21
0
def genMarkdown(html, outdir):
    # convert html to markdown and dump
    # TODO: fix link contains '\_'
    content = md(html)
    output = outdir / 'README.md'
    with output.open('w') as fp:
        fp.write(content)
Example #22
0
def markdownOfThread(ID=11.3):
    from markdownify import markdownify as md
    RET='\n'
    posts = getPostsFromThread(ID)
    print('getPostFromThread returned', len(posts), 'items')
    print('Posts0==', posts[0])
    title = f"""**{posts[0]['body'][:80]}...**
    
Category:{posts[0]['category']}

{posts[0]['isoformat'][:-15]} | {posts[0]['isoformat'][11:16]}
___
"""
            
    content = [f'\n{title}\n\n']
    for post in posts:
        postBody = md(post['body_html'])#.replace('<p>',RET).replace('</p>',RET)
        content.append(
f"""
![avatar]({avatarImg(post['username'])})  {post['username']}  

{post['isoformat'][:-15]} | {post['isoformat'][11:16]}

{postBody}
___
""")

    markdown = '\n'.join(content)
    return markdown
Example #23
0
 def __init__(self, data: dict) -> None:
     self.content: str = str(md(str(data.get('announcement', None)))).strip()
     self.created_at: datetime = data.get('time', None)
     self.is_pinned: bool = data.get('pinned', None)
     self.id: int = data.get('unique_id', None)
     self.bot_id: int = data.get('bot_id', None)
     self.author_id: int = data.get('creator_id', None)
Example #24
0
def markdownOfThread(df, ID=11.3):
    from markdownify import markdownify as md
    RET = '\n'
    posts = getPostsFromThread(df, ID)

    title = f"""**{posts[0]['body'][:80]}...**
    
Category:{str(posts[0]['topics'])}

{str(posts[0]['datetime'])[:-15]} | {str(posts[0]['datetime'])[11:16]} 

___
"""

    content = [f'\n{title}\n\n']
    for post in posts:
        postBody = md(post['body'])  #.replace('<p>',RET).replace('</p>',RET)
        content.append(f"""
![avatar]({avatarImg(post['name'])})  {post['name']}  

{str(post['datetime'])[:-15]} | {str(post['datetime'])[11:16]}


{postBody}
___
""")

    markdown = '\n'.join(content)
    return markdown
Example #25
0
 async def _send(self, json):
     """/message POST
     This is called by the REST API to send messages
     Messages are handled separately due to them being logged.
     Embeds are not logged.
     Messages are saved to the database and sent out via the webhook.
     The input argument is a json object cast to a dict by the webserver.
     It must contain the user, campaign, contents, and avatar URL.
     The campaign field must match up to a pre-configured campaign.
     It handles where the message should be sent, and which webhook."""
     campaign_entry = self.db.search((self.search.doctype == "campaign") & (
         self.search.campaign == json["campaign"]))
     if not campaign_entry:
         return
     else:
         campaign_entry = campaign_entry[0]
     json["content"] = md(json["content"])
     note_data = {
         "doctype": "rpbridge",
         "user": json["user"],
         "campaign": json["campaign"],
         "timestamp": datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
         "contents": json["content"]
     }
     webhook = await self.bot.fetch_webhook(campaign_entry["webhook"])
     message = await webhook.send(json["content"],
                                  wait=True,
                                  username=json["user"],
                                  avatar_url=json["avatar_url"])
     note_data["message"] = message.id
     self.db.insert(note_data)
Example #26
0
    def run(self, dispatcher: CollectingDispatcher, tracker: Tracker,
            domain: Dict[Text, Any]):
        disease = tracker.get_slot("disease")
        pre_disease = tracker.get_slot("sure")
        print("pre_disease::::" + str(pre_disease))

        possible_diseases = retrieve_disease_name(disease)
        if disease == pre_disease or len(possible_diseases) == 1:
            a = graph.run(
                "match (a:Disease{name: {disease}}) return a.prevent",
                disease=disease).data()[0]
            if 'a.prevent' in a:
                prevent = a['a.prevent']
                template = "以下是有关预防 {0} 的知识:{1}"
                retmsg = template.format(disease,
                                         md(prevent.replace('\n', '<br/>')))
            else:
                retmsg = disease + "暂无常见预防方法"
            dispatcher.utter_message(retmsg)
        elif len(possible_diseases) > 1:
            buttons = []
            for d in possible_diseases:
                buttons.append(
                    make_button(
                        d,
                        '/search_prevention{{"disease":"{0}", "sure":"{1}"}}'.
                        format(d, d)))
            dispatcher.utter_button_message("请点击选择想查询的疾病,若没有想要的,请忽略此消息",
                                            buttons)
        else:
            dispatcher.utter_message("知识库中暂无与 {0} 相关的预防记录".format(disease))
        return []
Example #27
0
    def create(self, request):
        action_serializer = ActionSerializer(data=request.data)

        if not action_serializer.is_valid():
            return Response(action_serializer.errors,
                            status=status.HTTP_400_BAD_REQUEST)

        partners_involved = action_serializer.validated_data.pop(
            'partners_involved')
        principles = action_serializer.validated_data.pop('principles')
        sustainable_development_goals = action_serializer.validated_data.pop(
            'sustainable_development_goals')
        action_data = Action.objects.create(**action_serializer.validated_data)

        setattr(action_data, 'cooperative_id', request.user.cooperative.id)
        for partner in partners_involved:
            action_data.partners_involved.add(partner['id'])

        for principle in principles:
            action_data.principles.add(principle['id'])

        for goal in sustainable_development_goals:
            action_data.sustainable_development_goals.add(goal['id'])

        action_data.description = md(
            action_data.description) if action_data.description else None
        action_data.save()
        return Response("ACTION_CREATED", status=status.HTTP_200_OK)
def test_chomp():
    assert md(' <b></b> ') == '  '
    assert md(' <b> </b> ') == '  '
    assert md(' <b>  </b> ') == '  '
    assert md(' <b>   </b> ') == '  '
    assert md(' <b>s </b> ') == ' **s**  '
    assert md(' <b> s</b> ') == '  **s** '
    assert md(' <b> s </b> ') == '  **s**  '
    assert md(' <b>  s  </b> ') == '  **s**  '
Example #29
0
 def __init__(self, zap_result, tool_name):
     zap_json = json.loads(zap_result)
     # Populate items
     self.items = list()
     for site in zap_json["site"]:
         for alert in site["alerts"]:
             description = list()
             if "desc" in alert:
                 description.append(md(alert["desc"]))
             if "solution" in alert:
                 description.append(f'**Solution**:\n {md(alert["solution"])}')
             if "reference" in alert:
                 description.append(f'**Reference**:\n {md(alert["reference"])}')
             if "otherinfo" in alert:
                 description.append(f'**Other information**:\n {md(alert["otherinfo"])}')
             description.append(f'**Confidence**: {md(c.ZAP_CONFIDENCES[alert["confidence"]])}')
             description = "\n".join(description)
             instances = list()
             if alert["instances"]:
                 instances.append("\n")
                 instances.append("| URI | Method | Parameter | Attack | Evidence |")
                 instances.append("| --- | ------ | --------- | ------ | -------- |")
             for item in alert["instances"]:
                 instances.append("| {} |".format(" | ".join([
                     html.escape(md_table_escape(item.get("uri", "-"))),
                     html.escape(md_table_escape(item.get("method", "-"))),
                     html.escape(md_table_escape(item.get("param", "-"))),
                     html.escape(md_table_escape(item.get("attack", "-"))),
                     html.escape(md_table_escape(item.get("evidence", "-")))
                 ])))
             finding = Finding(
                 title=alert["name"],
                 url=site["@name"],
                 description=description,
                 payload="\n".join(instances),
                 tool=tool_name,
                 test=tool_name,
                 severity=c.ZAP_SEVERITIES[alert["riskcode"]],
                 active=False,
                 verified=False,
                 dynamic_finding=True,
                 numerical_severity=Finding.get_numerical_severity(
                     c.ZAP_SEVERITIES[alert["riskcode"]]
                 )
             )
             finding.unsaved_endpoints = list()
             added_endpoints = set()
             for item in alert["instances"]:
                 if not item.get("uri", None):
                     continue
                 endpoint = make_endpoint_from_url(
                     item.get("uri"),
                     include_query=False, include_fragment=False
                 )
                 if str(endpoint) in added_endpoints:
                     continue
                 finding.unsaved_endpoints.append(endpoint)
                 added_endpoints.add(str(endpoint))
             self.items.append(finding)
Example #30
0
    def get_items(self, tree, test):
        """
        @return items A list of Host instances
        """

        items = list()
        for node in tree.findall('site'):
            site = Site(node)
            main_host = Endpoint(host=site.ip +
                                 site.port if site.port is not None else "")
            for item in site.items:
                severity = item.riskdesc.split(' ', 1)[0]
                references = ''
                for ref in item.ref:
                    references += ref + "\n"

                find = Finding(
                    title=item.name,
                    tool="ZAP",
                    cwe=item.cwe,
                    description=md(item.desc),
                    test=test,
                    severity=severity,
                    mitigation=md(item.resolution),
                    references=references,
                    active=False,
                    verified=False,
                    false_p=False,
                    duplicate=False,
                    out_of_scope=False,
                    mitigated=None,
                    impact="No impact provided",
                    numerical_severity=Finding.get_numerical_severity(
                        severity))

                find.unsaved_endpoints = [main_host]
                for i in item.items:
                    parts = urlparse(i['uri'])
                    find.unsaved_endpoints.append(
                        Endpoint(protocol=parts.scheme,
                                 host=parts.netloc,
                                 path=parts.path,
                                 query=parts.query,
                                 fragment=parts.fragment))
                items.append(find)
        return items
def test_nested_uls():
    """
    Nested ULs should alternate bullet characters.

    """
    assert md(
        nested_uls
    ) == '\n* 1\n\t+ a\n\t\t- I\n\t\t- II\n\t\t- III\n\t+ b\n\t+ c\n* 2\n* 3\n\n'
def convert_to_markdown(s):
    """Converts an html string to markdown"""

    s = unicode(s).encode('utf-8')
    #s = ''.join([i if ord(i) < 128 else ' ' for i in s])
    #print("Original:\n" + s)
    #print("====")

    s = strip(s, '[if', 'endif]')
    s = strip(s, '<!--', '-->')
    s = s.replace('<div>', "<br /><div>")
    s = s.replace('<tr>', "<br /><tr>")
    s = s.replace('<ul>', "<br /><ul>")
    s = s.replace('<li>', "<br /><li>")
    s = md(s).encode('utf-8')
    s = s.strip('"')
    s = s.strip()

    return s
def test_named_entities():
    assert md('&raquo;') == u'\xbb'
def test_br():
    assert md('a<br />b<br />c') == 'a  \nb  \nc'
def test_hn():
    assert md('<h3>Hello</h3>') == '### Hello\n\n'
    assert md('<h6>Hello</h6>') == '###### Hello\n\n'
def test_nested_blockquote():
    text = md('<blockquote>And she was like <blockquote>Hello</blockquote></blockquote>').strip()
    assert text == '> And she was like \n> > Hello'
def test_img():
    assert md('<img src="/path/to/img.jpg" alt="Alt text" title="Optional title" />') == '![Alt text](/path/to/img.jpg "Optional title")'
    assert md('<img src="/path/to/img.jpg" alt="Alt text" />') == '![Alt text](/path/to/img.jpg)'
def test_underscore():
    assert md('_hey_dude_') == '\_hey\_dude\_'
def test_nested():
    text = md('<p>This is an <a href="http://example.com/">example link</a>.</p>')
    assert text == 'This is an [example link](http://example.com/).\n\n'
def test_strong():
    assert md('<strong>Hello</strong>') == '**Hello**'
def test_p():
    assert md('<p>hello</p>') == 'hello\n\n'
def test_ol():
    assert md('<ol><li>a</li><li>b</li></ol>') == '1. a\n2. b\n'
def test_i():
    assert md('<i>Hello</i>') == '*Hello*'
def test_atx_closed_headings():
    assert md('<h1>Hello</h1>', heading_style=ATX_CLOSED) == '# Hello #\n\n'
    assert md('<h2>Hello</h2>', heading_style=ATX_CLOSED) == '## Hello ##\n\n'
def test_atx_headings():
    assert md('<h1>Hello</h1>', heading_style=ATX) == '# Hello\n\n'
    assert md('<h2>Hello</h2>', heading_style=ATX) == '## Hello\n\n'
def test_hexadecimal_entities():
    # This looks to be a bug in BeautifulSoup (fixed in bs4) that we have to work around.
    assert md('&#x27;') == '\x27'
def test_single_escaping_entities():
    assert md('&amp;amp;') == '&amp;'
def test_a_with_title():
    text = md('<a href="http://google.com" title="The &quot;Goog&quot;">Google</a>')
    assert text == r'[Google](http://google.com "The \"Goog\"")'
def test_xml_entities():
    assert md('&amp;') == '&'
def test_ul():
    assert md('<ul><li>a</li><li>b</li></ul>') == '* a\n* b\n'
def test_bullets():
    assert md(nested_uls, bullets='-') == '- 1\n\t- a\n\t\t- I\n\t\t- II\n\t\t- III\n\t\t\n\t- b\n\t- c\n\t\n- 2\n- 3\n'
def test_blockquote():
    assert md('<blockquote>Hello</blockquote>').strip() == '> Hello'
def test_nested_uls():
    """
    Nested ULs should alternate bullet characters.

    """
    assert md(nested_uls) == '* 1\n\t+ a\n\t\t- I\n\t\t- II\n\t\t- III\n\t\t\n\t+ b\n\t+ c\n\t\n* 2\n* 3\n'
def test_a():
    assert md('<a href="http://google.com">Google</a>') == '[Google](http://google.com)'
def test_em():
    assert md('<em>Hello</em>') == '*Hello*'
def test_h1():
    assert md('<h1>Hello</h1>') == 'Hello\n=====\n\n'
def test_a_no_autolinks():
    text = md('<a href="http://google.com">http://google.com</a>', autolinks=False)
    assert text == '[http://google.com](http://google.com)'
def test_b():
    assert md('<b>Hello</b>') == '**Hello**'
def test_h2():
    assert md('<h2>Hello</h2>') == 'Hello\n-----\n\n'
def test_a_shortcut():
    text = md('<a href="http://google.com">http://google.com</a>')
    assert text == '<http://google.com>'