def test_markdown_renderer(self): with open("tests/samples/syntax.md", encoding="utf-8") as f: text = f.read() markdown = marko.Markdown(renderer=MarkdownRenderer) rerendered = markdown(text) assert normalize_html(marko.convert(rerendered)) == normalize_html( marko.convert(text))
def test_markdown_renderer(self): from marko.md_renderer import MarkdownRenderer with open('tests/samples/syntax.md') as f: text = f.read() markdown = marko.Markdown(renderer=MarkdownRenderer) rerendered = markdown(text) assert (normalize_html(marko.convert(rerendered)) == normalize_html( marko.convert(text)))
def parse(self, text): context = [] if self.project == 'vscode': text = text[7:] html = marko.convert(text) soup = BeautifulSoup(marko.convert(text), 'html.parser') for child in soup.contents: if type(child) != bs4.element.Tag: continue context.append(child.text.strip()) return '\n'.join(context)
def getPlainMetadataFromArticle(article): # Given an `Article` instance, # get a dict cotaining flatten down metadata for the # article: title, contributors etc. title = marko.convert(''.join(article.data.get('title', ''))) abstract = marko.convert(''.join(article.data.get('abstract', ''))) contributor = marko.convert(''.join(article.data.get('contributor', ''))) return { 'pid': article.abstract.pid, 'contributor': strip_tags(contributor).strip(), 'title': strip_tags(title).strip(), 'abstract': strip_tags(abstract).strip(), 'keywords': article.data.get('keywords', []), 'data': article.data }
def render_recipe_page(recipe: Recipe, env: Environment) -> str: """Formats an HTML page for a given Recipe, returned as a str Args: recipe (Recipe): The Recipe object to render env (Environment): A Jinja environment Returns: str: HTML representing the Recipe """ return env.get_template("recipe.html").render( recipe=recipe, ingredients_html=marko.convert(recipe.ingredients), instructions_html=marko.convert(recipe.instructions), )
def get_code_and_info_from_py(file_path): if file_path.endswith('py'): pattern = r"[\'\"]{3}(.*?)['\"]{3}(.*)" else: # print(file_path) pattern = r"/\*\s*.*?(.*)\*/\s*(.*)" # print(file_path) f = open(file_path).read() matches = re.search(pattern, f, re.MULTILINE | re.DOTALL) if matches: if len(matches.groups()) >= 2: return matches.group(2).strip(), marko.convert(matches.group(1)) return '', marko.convert(matches.group(0)) return f, ''
def md_to_html(md): try: html = marko.convert(md) html = html.replace("\n", "") return html except Exception: return ""
def get_tag_text(url, tag): try: resp = requests.get(url) if resp.status_code == 200: mark = marko.convert(resp.text) soup = BeautifulSoup(mark, 'html.parser') return soup.find_all(tag) except Exception as e: print('Exception: %s' % (e)) return None
def test_tag_surrounding_two_paragraphs_without_newlines(self) -> None: text = textwrap.dedent("""\ <model name="something">first line second line</model>""") document = marko.convert(text) # This is actually incorrect HTML and a bug in marko. self.assertEqual( '<p><model name="something">first line</p>\n<p>second line</model></p>\n', document, )
def test_tag_surrounding_multiline_paragraph(self) -> None: text = textwrap.dedent("""\ <model name="something">first line second line</model>""") document = marko.convert(text) self.assertEqual( '<p><model name="something">' "first line\n" "second line</model></p>\n", document, )
def test_that_too_few_newlines_dont_work(self) -> None: text = textwrap.dedent("""\ <model name="something"> ``` some code ``` </model> """) document = marko.convert(text) self.assertEqual( '<model name="something">\n```\nsome code\n```\n</model>\n', document)
def collectImg(project, step, lang): url = rootURL + project + '/master/' + lang + '/step_' + str(step) + '.md' resp = requests.get(url) try: mark = marko.convert(resp.text) soup = BeautifulSoup(mark, 'html.parser') for img in soup.find_all("img"): imgDownload.imgDownToPath( rootPath + project + "/" + lang + "/" + img["src"], rootURL + project + "/master/" + lang + "/" + img["src"]) except Exception as e: print(e)
def parse_awesome(self): """ Sync rulesets list from awesome-yara rule """ r = requests.get(AWESOME_PATH) soup = BeautifulSoup(marko.convert(r.text), features="html.parser") rulesets_a = soup.h2.nextSibling.nextSibling.find_all("a") rulesets = [] for ruleset in rulesets_a: link = ruleset["href"].split("/tree/")[0] name = ruleset.contents[0] try: description = BeautifulSoup(ruleset.nextSibling.li.text, "html.parser").text except AttributeError: try: description = BeautifulSoup( ruleset.nextSibling.nextSibling.li.text, "html.parser").text except AttributeError: description = None if link.startswith("https://github.com/"): rulesets.append((link, name, description)) self.stdout.write( self.style.SUCCESS("Found {} repo".format(len(rulesets)))) with transaction.atomic(): pool = ThreadPool(THREAD_NO) _ = pool.map(self.down_repo, rulesets) pool.close() self.stdout.write("DONE") if len(self.update_repo_list) > 0: # DISABLE ALL REPO NOT ANYMORE ON AWESOME old_rulesets = Ruleset.objects.filter(user__isnull=True).exclude( pk__in=self.update_repo_list) for ruleset in old_rulesets: ruleset.deleted = timezone.now() ruleset.disabled = True ruleset.save() for rule in ruleset.rules.all(): rule.deleted = timezone.now() rule.disabled = True rule.save() self.stdout.write(self.style.SUCCESS("All repos updated!")) else: self.stdout.write( self.style.ERROR("No ruleset found, check code!"))
def test_too_few_new_lines_when_two_paragraphs(self) -> None: text = textwrap.dedent("""\ <model name="something"> first line second line </model> """) document = marko.convert(text) # This is actually incorrect HTML and a bug in marko. self.assertEqual( '<model name="something">\nfirst line\n<p>second line\n</model></p>\n', document, )
def test_the_necessary_newlines(self) -> None: text = textwrap.dedent("""\ <model name="something"> ``` some code ``` </model> """) document = marko.convert(text) self.assertEqual( '<model name="something">\n<pre><code>some code\n</code></pre>\n</model>\n', document, )
def filter_text(lines: str) -> str: try: text = frontmatter.loads(lines).content lines = text except Exception as e: print(lines) print(e) plain_html = marko.convert(lines) soup = BeautifulSoup(plain_html, features='html.parser') for el in soup.find_all('pre'): el.extract() return soup.get_text()
def home(): readme = _read_file(app_file_path / "static/home.md") css = _read_file(app_file_path / "static/style.css") doc = dominate.document(title='PortProxy') with doc.head: style(css) with doc: h1("PortProxy") h2("Status") h3(a("Reconnect all", href="/reconnect"), " |", a("Stop all", href="/stop")) _table = table() with _table.add(tbody()): with tr(): for _header in HEADERS: td(b(_header)) for k, v in ports.items(): link_to_proxy = f"/{v['machine_name']}/{v['remote_port']}" is_active = v['tunnel'].is_active status = "Active" if is_active else "Stopped" stop_link = "/stop" + link_to_proxy delete_link = "/delete" + link_to_proxy reconnect_link = "/reconnect" + link_to_proxy local_link = v['link'] with _table.add(tbody()): _status = td(b(status)) _status.set_attribute('class', status.lower()) td(v['machine_name']) td(v['remote_port']) td(a(link_to_proxy, href=link_to_proxy)) td(a(local_link, href=local_link)) td(a(b("stop"), href=stop_link)) td(a(b("reconnect"), href=reconnect_link)) td(a(b("delete"), href=delete_link)) h2("What is PortProxy?") raw(marko.convert(readme)) return str(doc)
def parse_spec(spec): markdown = marko.convert(spec) soup = BeautifulSoup(markdown, features="html.parser") # print(soup) code = [ s.contents[0] for s in soup.findAll("code", {"class": "language-json"}) if s.contents ] ret = [] for c in code: try: j = json.loads(c) ret.append(j) except: pass ret = [r for r in ret if "action" in r] return ret
def test_sufficient_new_lines_when_two_paragraphs(self) -> None: text = textwrap.dedent("""\ <model name="something"> first line second line </model> """) document = marko.convert(text) self.assertEqual( '<model name="something">\n' "<p>first line</p>\n" "<p>second line</p>\n" "</model>\n", document, )
def get_citation(raw_url, article): # output # logger.info("title marko" + marko.convert(article.data["title"])) titleEscape = strip_tags(''.join(marko.convert( (article.data["title"][0])))).rstrip() authors = [] """ mainAuthor = { "given": article.abstract.contact_firstname, "family": article.abstract.contact_lastname } authors.append(mainAuthor) """ authorIds = article.abstract.authors.all() for contrib in authorIds: contributor = get_object_or_404(Author, lastname=contrib) contrib = { "given": contributor.firstname, "family": contributor.lastname } authors.append(contrib) return ({ # DO NOT DISPLAYED THE DOI FOR THE MOMENT # "DOI": article.doi, "URL": "https://journalofdigitalhistory.org/en/article/" + article.abstract.pid, "type": "article-journal", "issue": article.issue.pid, "title": titleEscape, "author": authors, "issued": { "year": article.issue.creation_date.strftime("%Y") }, # "volume": "1", "container-title": "Journal of Digital History", "container-title-short": "JDH" })
def parse(src): with open(src) as f: lines = f.readlines() content = [] variables = {} for line in lines: if not line.startswith("/"): content.append(line) if line.startswith("///"): _, tag, rest = line.split(" ", 2) variables[tag[:-1]] = rest.strip() if "author" not in variables: variables["author"] = "Edvard Thörnros" assert ("title" in variables) assert ("first" in variables) assert ("last" in variables) variables["content"] = marko.convert("".join(content)) return variables
def parse_awesome(self): """ Sync rulesets list from awesome-yara rule """ r = requests.get(settings.AWESOME_PATH) soup = BeautifulSoup(marko.convert(r.text), features="html.parser") rulesets_a = soup.h2.nextSibling.nextSibling.find_all("a") rulesets = [] for ruleset in rulesets_a: link = ruleset["href"].split("/tree/")[0] name = ruleset.contents[0] try: description = BeautifulSoup( ruleset.nextSibling.li.text, "html.parser" ).text except AttributeError: try: description = BeautifulSoup( ruleset.nextSibling.nextSibling.li.text, "html.parser" ).text except AttributeError: description = None if link.startswith("https://github.com/"): rulesets.append((link, name, description)) # UPDATE MANUAL ADDED REPO other_rulesets = Ruleset.objects.filter( user__isnull=True, enabled=True ).exclude(url__in=[x[0] for x in rulesets]) for ruleset in other_rulesets: rulesets.append((ruleset.url, ruleset.name, ruleset.description)) self.stdout.write(self.style.SUCCESS("Found {} repo".format(len(rulesets)))) with transaction.atomic(): pool = ThreadPool(settings.THREAD_NO) _ = pool.map(self.down_repo, rulesets) pool.close() self.stdout.write("DONE")
def read_content(filename): """Read content and metadata from file into a dictionary.""" # Read file content. text = fread(filename) # Read metadata and save it in a dictionary. date_slug = os.path.basename(filename).split('.')[0] match = re.search(r'^(?:(\d\d\d\d-\d\d-\d\d)-)?(.+)$', date_slug) content = { 'date': match.group(1) or '1970-01-01', 'slug': match.group(2), } # Read headers. end = 0 for key, val, end in read_headers(text): content[key] = val # Separate content from headers. text = text[end:] # Convert Markdown content to HTML. if filename.endswith(('.md', '.mkd', '.mkdn', '.mdown', '.markdown')): try: if _test == 'ImportError': raise ImportError('Error forced by test') text = marko.convert(text) except ImportError as e: log('WARNING: Cannot render Markdown in {}: {}', filename, str(e)) # Update the dictionary with content and RFC 2822 date. content.update({ 'content': text, 'rfc_2822_date': rfc_2822_format(content['date']) }) return content
def get_description(self): return marko.convert(self.description)
def parse_markdown(markdown_raw): content = convert(markdown_raw) return content
def parseJupyterNotebook(notebook): cells = notebook.get('cells') title = [] abstract = [] contributor = [] disclaimer = [] paragraphs = [] collaborators = [] keywords = [] references, bibliography, refs = getReferencesFromJupyterNotebook(notebook) def formatInlineCitations(m): parsed_ref = refs.get(m[1], None) if parsed_ref is None: return f'{m[1]}' return parsed_ref num = 0 for cell in cells: # check cell metadata tags = cell.get('metadata', {}).get('tags', []) source = ''.join(cell.get('source', '')) source = re.sub(r'<cite\s+data-cite=.([/\dA-Z]+).>([^<]*)</cite>', formatInlineCitations, source) if 'hidden' in tags: continue if 'title' in tags: title.append(marko.convert(source)) elif 'abstract' in tags: abstract.append(marko.convert(source)) elif 'contributor' in tags: contributor.append(marko.convert(source)) elif 'disclaimer' in tags: disclaimer.append(marko.convert(source)) elif 'collaborators' in tags: collaborators.append(marko.convert(source)) elif 'keywords' in tags: keywords.append(marko.convert(source)) else: if cell.get('cell_type') == 'markdown': num = num + 1 paragraphs.append({ "num": num, "source": marko.convert(source) }) elif cell.get('cell_type') == 'code': num = num + 1 paragraphs.append({ "numCode": num, "code": marko.convert(source) }) return { 'title': title, 'title_plain': strip_tags(''.join(title)).strip(), 'abstract': abstract, 'abstract_plain': strip_tags(''.join(abstract)).strip(), 'contributor': contributor, 'disclaimer': disclaimer, 'paragraphs': paragraphs, 'collaborators': collaborators, 'keywords': keywords, 'references': references, 'bibliography': bibliography }
def _parse_markdown(text: str) -> str: html_content = marko.convert(text).rstrip() return html_content
def get(self): for root, _, files in os.walk(self.summaries_folder): for file in files: title = None targets = None terminology = None text = None summary = [] rfc = None if 'rfc' in file: rfc = re.findall("\d+", file)[0] if rfc: filename = os.path.join(root, file) out = parse_file(filename) # TITLE title = marko.convert(out.title) title = re.sub("</?p[^>]*>", "", title) if out[0].text == "Content": # TARGET if out[0][0].text == "Targets": targets = marko.convert(out[0][0].source) targets = re.sub("</?p[^>]*>", "", targets) # TERMINOLOGY if out[0][1].text == "Terminology": terminology = marko.convert(out[0][1].source) terminology = re.sub("</?ul[^>]*>", "", terminology) terminology = re.sub("</?a[^>]*>", "", terminology) terminology = re.sub( "<li[^>]*>", "<span class='badge badge-success'>", terminology) terminology = re.sub("</li[^>]*>", "</span>", terminology) # TEXT if out[0][2].text == "Summary": text = marko.convert(out[0][2].source) text = re.sub("</?p[^>]*>", "", text) if title and targets and terminology and text: summary.append("<div class='card'>") summary.append("<div class='card-header'>") summary.append(title) summary.append("</div>") summary.append("<div class='card-body'>") summary.append("<h5 class='card-title'>Targets: " + str(targets) + "</h5>") summary.append( "<h6 class='card-subtitle mb-2 text-muted'>Terminology</h6>" ) summary.append("<p class='card-text'>") summary.append(terminology) summary.append("<hr/>") summary.append(text) summary.append("</p>") summary.append("</div>") summary.append("</div>") if rfc and summary: self.summaries[rfc] = summary if self.summaries: try: with open(self.summaries_filename, "w") as fo: json.dump(self.summaries, fo) except Exception: return False return self.summaries return False
def helper(text: str, func=unmarkd.unmark) -> None: value0 = marko.convert(text) unmarked = func(html=value0) value1 = marko.convert(unmarked) assert value0 == value1, (value0, value1, unmarked)
if resp.status_code == 200: mark = marko.convert(resp.text) soup = BeautifulSoup(mark, 'html.parser') return soup.find(tag).text except Exception as e: print('Exception: %s' % (e)) return None def get_tag(url, tag): try: resp = requests.get(url) if resp.status_code == 200: mark = marko.convert(resp.text) soup = BeautifulSoup(mark, 'html.parser') return soup.find(tag) except Exception as e: print('Exception: %s' % (e)) return None url = 'https://raw.githubusercontent.com/raspberrypilearning/interactive-badge/master/en/step_1.md' resp = requests.get(url) mark = marko.convert(resp.text) soup = BeautifulSoup(mark, 'html.parser') # print(resp.text) print(get_tag_text(url, 'h2')) print(get_tag(url, 'iframe')) print(get_tag(url, 'collapse')) print(get_tag_text(url, 'li'))