Esempio n. 1
0
    def test_markdown_renderer(self):
        with open("tests/samples/syntax.md", encoding="utf-8") as f:
            text = f.read()

        markdown = marko.Markdown(renderer=MarkdownRenderer)
        rerendered = markdown(text)
        assert normalize_html(marko.convert(rerendered)) == normalize_html(
            marko.convert(text))
Esempio n. 2
0
    def test_markdown_renderer(self):
        from marko.md_renderer import MarkdownRenderer

        with open('tests/samples/syntax.md') as f:
            text = f.read()

        markdown = marko.Markdown(renderer=MarkdownRenderer)
        rerendered = markdown(text)
        assert (normalize_html(marko.convert(rerendered)) == normalize_html(
            marko.convert(text)))
Esempio n. 3
0
    def parse(self, text):
        context = []
        if self.project == 'vscode':
            text = text[7:]
        html = marko.convert(text)
        soup = BeautifulSoup(marko.convert(text), 'html.parser')

        for child in soup.contents:
            if type(child) != bs4.element.Tag:
                continue
            context.append(child.text.strip())
        return '\n'.join(context)
Esempio n. 4
0
def getPlainMetadataFromArticle(article):
    # Given an `Article` instance,
    # get a dict cotaining flatten down metadata for the
    # article: title, contributors etc.
    title = marko.convert(''.join(article.data.get('title', '')))
    abstract = marko.convert(''.join(article.data.get('abstract', '')))
    contributor = marko.convert(''.join(article.data.get('contributor', '')))
    return {
        'pid': article.abstract.pid,
        'contributor': strip_tags(contributor).strip(),
        'title': strip_tags(title).strip(),
        'abstract': strip_tags(abstract).strip(),
        'keywords': article.data.get('keywords', []),
        'data': article.data
    }
Esempio n. 5
0
def render_recipe_page(recipe: Recipe, env: Environment) -> str:
    """Formats an HTML page for a given Recipe, returned as a str

    Args:
        recipe (Recipe): The Recipe object to render
        env (Environment): A Jinja environment

    Returns:
        str: HTML representing the Recipe
    """

    return env.get_template("recipe.html").render(
        recipe=recipe,
        ingredients_html=marko.convert(recipe.ingredients),
        instructions_html=marko.convert(recipe.instructions),
    )
Esempio n. 6
0
def get_code_and_info_from_py(file_path):
    if file_path.endswith('py'):
        pattern = r"[\'\"]{3}(.*?)['\"]{3}(.*)"
    else:
        # print(file_path)
        pattern = r"/\*\s*.*?(.*)\*/\s*(.*)"
    # print(file_path)
    f = open(file_path).read()
    matches = re.search(pattern, f, re.MULTILINE | re.DOTALL)

    if matches:
        if len(matches.groups()) >= 2:
            return matches.group(2).strip(), marko.convert(matches.group(1))
        return '', marko.convert(matches.group(0))

    return f, ''
Esempio n. 7
0
def md_to_html(md):
    try:
        html = marko.convert(md)
        html = html.replace("\n", "")
        return html
    except Exception:
        return ""
Esempio n. 8
0
def get_tag_text(url, tag):
    try:
        resp = requests.get(url)
        if resp.status_code == 200:
            mark = marko.convert(resp.text)
            soup = BeautifulSoup(mark, 'html.parser')
            return soup.find_all(tag)
    except Exception as e:
        print('Exception: %s' % (e))
        return None
Esempio n. 9
0
    def test_tag_surrounding_two_paragraphs_without_newlines(self) -> None:
        text = textwrap.dedent("""\
            <model name="something">first line
            
            second line</model>""")

        document = marko.convert(text)
        # This is actually incorrect HTML and a bug in marko.
        self.assertEqual(
            '<p><model name="something">first line</p>\n<p>second line</model></p>\n',
            document,
        )
Esempio n. 10
0
    def test_tag_surrounding_multiline_paragraph(self) -> None:
        text = textwrap.dedent("""\
            <model name="something">first line
            second line</model>""")

        document = marko.convert(text)
        self.assertEqual(
            '<p><model name="something">'
            "first line\n"
            "second line</model></p>\n",
            document,
        )
Esempio n. 11
0
    def test_that_too_few_newlines_dont_work(self) -> None:
        text = textwrap.dedent("""\
            <model name="something">
            ```
            some code
            ```
            </model>
            """)

        document = marko.convert(text)
        self.assertEqual(
            '<model name="something">\n```\nsome code\n```\n</model>\n',
            document)
Esempio n. 12
0
def collectImg(project, step, lang):
    url = rootURL + project + '/master/' + lang + '/step_' + str(step) + '.md'
    resp = requests.get(url)
    try:
        mark = marko.convert(resp.text)
        soup = BeautifulSoup(mark, 'html.parser')
        for img in soup.find_all("img"):

            imgDownload.imgDownToPath(
                rootPath + project + "/" + lang + "/" + img["src"],
                rootURL + project + "/master/" + lang + "/" + img["src"])
    except Exception as e:
        print(e)
Esempio n. 13
0
    def parse_awesome(self):
        """
        Sync rulesets list from awesome-yara rule
        """
        r = requests.get(AWESOME_PATH)
        soup = BeautifulSoup(marko.convert(r.text), features="html.parser")
        rulesets_a = soup.h2.nextSibling.nextSibling.find_all("a")
        rulesets = []
        for ruleset in rulesets_a:
            link = ruleset["href"].split("/tree/")[0]
            name = ruleset.contents[0]
            try:
                description = BeautifulSoup(ruleset.nextSibling.li.text,
                                            "html.parser").text
            except AttributeError:
                try:
                    description = BeautifulSoup(
                        ruleset.nextSibling.nextSibling.li.text,
                        "html.parser").text
                except AttributeError:
                    description = None
            if link.startswith("https://github.com/"):
                rulesets.append((link, name, description))

        self.stdout.write(
            self.style.SUCCESS("Found {} repo".format(len(rulesets))))

        with transaction.atomic():
            pool = ThreadPool(THREAD_NO)
            _ = pool.map(self.down_repo, rulesets)
            pool.close()

        self.stdout.write("DONE")

        if len(self.update_repo_list) > 0:
            # DISABLE ALL REPO NOT ANYMORE ON AWESOME
            old_rulesets = Ruleset.objects.filter(user__isnull=True).exclude(
                pk__in=self.update_repo_list)
            for ruleset in old_rulesets:
                ruleset.deleted = timezone.now()
                ruleset.disabled = True
                ruleset.save()
                for rule in ruleset.rules.all():
                    rule.deleted = timezone.now()
                    rule.disabled = True
                    rule.save()
            self.stdout.write(self.style.SUCCESS("All repos updated!"))
        else:
            self.stdout.write(
                self.style.ERROR("No ruleset found, check code!"))
Esempio n. 14
0
    def test_too_few_new_lines_when_two_paragraphs(self) -> None:
        text = textwrap.dedent("""\
            <model name="something">
            first line
            
            second line
            </model>
            """)

        document = marko.convert(text)
        # This is actually incorrect HTML and a bug in marko.
        self.assertEqual(
            '<model name="something">\nfirst line\n<p>second line\n</model></p>\n',
            document,
        )
Esempio n. 15
0
    def test_the_necessary_newlines(self) -> None:
        text = textwrap.dedent("""\
            <model name="something">
            
            ```
            some code
            ```
            
            </model>
            """)

        document = marko.convert(text)
        self.assertEqual(
            '<model name="something">\n<pre><code>some code\n</code></pre>\n</model>\n',
            document,
        )
Esempio n. 16
0
def filter_text(lines: str) -> str:
    try:
        text = frontmatter.loads(lines).content
        lines = text
    except Exception as e:
        print(lines)
        print(e)

    plain_html = marko.convert(lines)

    soup = BeautifulSoup(plain_html, features='html.parser')

    for el in soup.find_all('pre'):
        el.extract()

    return soup.get_text()
Esempio n. 17
0
def home():
    readme = _read_file(app_file_path / "static/home.md")
    css = _read_file(app_file_path / "static/style.css")
    doc = dominate.document(title='PortProxy')

    with doc.head:
        style(css)

    with doc: 
        h1("PortProxy")
        h2("Status")
        h3(a("Reconnect all", href="/reconnect"), " |", a("Stop all", href="/stop"))

        _table = table()
        with _table.add(tbody()):
            with tr():
                for _header in HEADERS:
                    td(b(_header))

        for k, v in ports.items():
            link_to_proxy = f"/{v['machine_name']}/{v['remote_port']}"
            is_active = v['tunnel'].is_active
            status = "Active" if is_active else "Stopped"

            stop_link = "/stop" + link_to_proxy
            delete_link = "/delete" + link_to_proxy
            reconnect_link = "/reconnect" + link_to_proxy
            local_link = v['link']

            with _table.add(tbody()):
                _status = td(b(status))
                _status.set_attribute('class', status.lower())

                td(v['machine_name'])
                td(v['remote_port'])
                
                td(a(link_to_proxy, href=link_to_proxy))
                td(a(local_link, href=local_link))

                td(a(b("stop"), href=stop_link))
                td(a(b("reconnect"), href=reconnect_link))
                td(a(b("delete"), href=delete_link))
        
        h2("What is PortProxy?")
        raw(marko.convert(readme))
    
    return str(doc)
Esempio n. 18
0
def parse_spec(spec):
    markdown = marko.convert(spec)
    soup = BeautifulSoup(markdown, features="html.parser")
    # print(soup)
    code = [
        s.contents[0] for s in soup.findAll("code", {"class": "language-json"})
        if s.contents
    ]
    ret = []
    for c in code:
        try:
            j = json.loads(c)
            ret.append(j)
        except:
            pass
    ret = [r for r in ret if "action" in r]
    return ret
Esempio n. 19
0
    def test_sufficient_new_lines_when_two_paragraphs(self) -> None:
        text = textwrap.dedent("""\
            <model name="something">
            
            first line

            second line
            
            </model>
            """)

        document = marko.convert(text)
        self.assertEqual(
            '<model name="something">\n'
            "<p>first line</p>\n"
            "<p>second line</p>\n"
            "</model>\n",
            document,
        )
Esempio n. 20
0
def get_citation(raw_url, article):
    # output
    # logger.info("title marko" + marko.convert(article.data["title"]))
    titleEscape = strip_tags(''.join(marko.convert(
        (article.data["title"][0])))).rstrip()
    authors = []
    """ mainAuthor = {
        "given": article.abstract.contact_firstname,
        "family": article.abstract.contact_lastname
    }
    authors.append(mainAuthor) """
    authorIds = article.abstract.authors.all()
    for contrib in authorIds:
        contributor = get_object_or_404(Author, lastname=contrib)
        contrib = {
            "given": contributor.firstname,
            "family": contributor.lastname
        }
        authors.append(contrib)
    return ({
        # DO NOT DISPLAYED THE DOI FOR THE MOMENT
        # "DOI": article.doi,
        "URL":
        "https://journalofdigitalhistory.org/en/article/" +
        article.abstract.pid,
        "type":
        "article-journal",
        "issue":
        article.issue.pid,
        "title":
        titleEscape,
        "author":
        authors,
        "issued": {
            "year": article.issue.creation_date.strftime("%Y")
        },
        # "volume": "1",
        "container-title":
        "Journal of Digital History",
        "container-title-short":
        "JDH"
    })
Esempio n. 21
0
def parse(src):
    with open(src) as f:
        lines = f.readlines()
        content = []
        variables = {}
        for line in lines:
            if not line.startswith("/"):
                content.append(line)
            if line.startswith("///"):
                _, tag, rest = line.split(" ", 2)
                variables[tag[:-1]] = rest.strip()

        if "author" not in variables:
            variables["author"] = "Edvard Thörnros"

        assert ("title" in variables)
        assert ("first" in variables)
        assert ("last" in variables)

        variables["content"] = marko.convert("".join(content))
        return variables
Esempio n. 22
0
    def parse_awesome(self):
        """
        Sync rulesets list from awesome-yara rule
        """
        r = requests.get(settings.AWESOME_PATH)
        soup = BeautifulSoup(marko.convert(r.text), features="html.parser")
        rulesets_a = soup.h2.nextSibling.nextSibling.find_all("a")
        rulesets = []
        for ruleset in rulesets_a:
            link = ruleset["href"].split("/tree/")[0]
            name = ruleset.contents[0]
            try:
                description = BeautifulSoup(
                    ruleset.nextSibling.li.text, "html.parser"
                ).text
            except AttributeError:
                try:
                    description = BeautifulSoup(
                        ruleset.nextSibling.nextSibling.li.text, "html.parser"
                    ).text
                except AttributeError:
                    description = None
            if link.startswith("https://github.com/"):
                rulesets.append((link, name, description))

        # UPDATE MANUAL ADDED REPO
        other_rulesets = Ruleset.objects.filter(
            user__isnull=True, enabled=True
        ).exclude(url__in=[x[0] for x in rulesets])
        for ruleset in other_rulesets:
            rulesets.append((ruleset.url, ruleset.name, ruleset.description))

        self.stdout.write(self.style.SUCCESS("Found {} repo".format(len(rulesets))))

        with transaction.atomic():
            pool = ThreadPool(settings.THREAD_NO)
            _ = pool.map(self.down_repo, rulesets)
            pool.close()

        self.stdout.write("DONE")
Esempio n. 23
0
def read_content(filename):
    """Read content and metadata from file into a dictionary."""
    # Read file content.
    text = fread(filename)

    # Read metadata and save it in a dictionary.
    date_slug = os.path.basename(filename).split('.')[0]
    match = re.search(r'^(?:(\d\d\d\d-\d\d-\d\d)-)?(.+)$', date_slug)
    content = {
        'date': match.group(1) or '1970-01-01',
        'slug': match.group(2),
    }

    # Read headers.
    end = 0
    for key, val, end in read_headers(text):
        content[key] = val

    # Separate content from headers.
    text = text[end:]

    # Convert Markdown content to HTML.
    if filename.endswith(('.md', '.mkd', '.mkdn', '.mdown', '.markdown')):
        try:
            if _test == 'ImportError':
                raise ImportError('Error forced by test')
            text = marko.convert(text)
        except ImportError as e:
            log('WARNING: Cannot render Markdown in {}: {}', filename, str(e))

    # Update the dictionary with content and RFC 2822 date.
    content.update({
        'content': text,
        'rfc_2822_date': rfc_2822_format(content['date'])
    })

    return content
Esempio n. 24
0
 def get_description(self):
     return marko.convert(self.description)
Esempio n. 25
0
def parse_markdown(markdown_raw):
    content = convert(markdown_raw)
    return content
Esempio n. 26
0
def parseJupyterNotebook(notebook):
    cells = notebook.get('cells')
    title = []
    abstract = []
    contributor = []
    disclaimer = []
    paragraphs = []
    collaborators = []
    keywords = []
    references, bibliography, refs = getReferencesFromJupyterNotebook(notebook)

    def formatInlineCitations(m):
        parsed_ref = refs.get(m[1], None)
        if parsed_ref is None:
            return f'{m[1]}'
        return parsed_ref

    num = 0
    for cell in cells:
        # check cell metadata
        tags = cell.get('metadata', {}).get('tags', [])
        source = ''.join(cell.get('source', ''))
        source = re.sub(r'<cite\s+data-cite=.([/\dA-Z]+).>([^<]*)</cite>',
                        formatInlineCitations, source)
        if 'hidden' in tags:
            continue
        if 'title' in tags:
            title.append(marko.convert(source))
        elif 'abstract' in tags:
            abstract.append(marko.convert(source))
        elif 'contributor' in tags:
            contributor.append(marko.convert(source))
        elif 'disclaimer' in tags:
            disclaimer.append(marko.convert(source))
        elif 'collaborators' in tags:
            collaborators.append(marko.convert(source))
        elif 'keywords' in tags:
            keywords.append(marko.convert(source))
        else:
            if cell.get('cell_type') == 'markdown':
                num = num + 1
                paragraphs.append({
                    "num": num,
                    "source": marko.convert(source)
                })
            elif cell.get('cell_type') == 'code':
                num = num + 1
                paragraphs.append({
                    "numCode": num,
                    "code": marko.convert(source)
                })

    return {
        'title': title,
        'title_plain': strip_tags(''.join(title)).strip(),
        'abstract': abstract,
        'abstract_plain': strip_tags(''.join(abstract)).strip(),
        'contributor': contributor,
        'disclaimer': disclaimer,
        'paragraphs': paragraphs,
        'collaborators': collaborators,
        'keywords': keywords,
        'references': references,
        'bibliography': bibliography
    }
Esempio n. 27
0
def _parse_markdown(text: str) -> str:
    html_content = marko.convert(text).rstrip()
    return html_content
Esempio n. 28
0
    def get(self):
        for root, _, files in os.walk(self.summaries_folder):
            for file in files:
                title = None
                targets = None
                terminology = None
                text = None
                summary = []
                rfc = None

                if 'rfc' in file:
                    rfc = re.findall("\d+", file)[0]
                    if rfc:
                        filename = os.path.join(root, file)
                        out = parse_file(filename)

                        # TITLE
                        title = marko.convert(out.title)
                        title = re.sub("</?p[^>]*>", "", title)

                        if out[0].text == "Content":
                            # TARGET
                            if out[0][0].text == "Targets":
                                targets = marko.convert(out[0][0].source)
                                targets = re.sub("</?p[^>]*>", "", targets)
                            # TERMINOLOGY
                            if out[0][1].text == "Terminology":
                                terminology = marko.convert(out[0][1].source)
                                terminology = re.sub("</?ul[^>]*>", "",
                                                     terminology)
                                terminology = re.sub("</?a[^>]*>", "",
                                                     terminology)
                                terminology = re.sub(
                                    "<li[^>]*>",
                                    "<span class='badge badge-success'>",
                                    terminology)
                                terminology = re.sub("</li[^>]*>", "</span>",
                                                     terminology)
                            # TEXT
                            if out[0][2].text == "Summary":
                                text = marko.convert(out[0][2].source)
                                text = re.sub("</?p[^>]*>", "", text)

                        if title and targets and terminology and text:
                            summary.append("<div class='card'>")
                            summary.append("<div class='card-header'>")
                            summary.append(title)
                            summary.append("</div>")
                            summary.append("<div class='card-body'>")
                            summary.append("<h5 class='card-title'>Targets: " +
                                           str(targets) + "</h5>")
                            summary.append(
                                "<h6 class='card-subtitle mb-2 text-muted'>Terminology</h6>"
                            )
                            summary.append("<p class='card-text'>")
                            summary.append(terminology)
                            summary.append("<hr/>")
                            summary.append(text)
                            summary.append("</p>")
                            summary.append("</div>")
                            summary.append("</div>")

                if rfc and summary:
                    self.summaries[rfc] = summary

        if self.summaries:
            try:
                with open(self.summaries_filename, "w") as fo:
                    json.dump(self.summaries, fo)
            except Exception:
                return False

            return self.summaries

        return False
Esempio n. 29
0
def helper(text: str, func=unmarkd.unmark) -> None:
    value0 = marko.convert(text)
    unmarked = func(html=value0)
    value1 = marko.convert(unmarked)
    assert value0 == value1, (value0, value1, unmarked)
Esempio n. 30
0
        if resp.status_code == 200:
            mark = marko.convert(resp.text)
            soup = BeautifulSoup(mark, 'html.parser')
            return soup.find(tag).text
    except Exception as e:
        print('Exception: %s' % (e))
        return None


def get_tag(url, tag):
    try:
        resp = requests.get(url)
        if resp.status_code == 200:
            mark = marko.convert(resp.text)
            soup = BeautifulSoup(mark, 'html.parser')
            return soup.find(tag)
    except Exception as e:
        print('Exception: %s' % (e))
        return None


url = 'https://raw.githubusercontent.com/raspberrypilearning/interactive-badge/master/en/step_1.md'
resp = requests.get(url)
mark = marko.convert(resp.text)
soup = BeautifulSoup(mark, 'html.parser')

# print(resp.text)
print(get_tag_text(url, 'h2'))
print(get_tag(url, 'iframe'))
print(get_tag(url, 'collapse'))
print(get_tag_text(url, 'li'))