def parse(self, inputstring, document): self.document = document self.current_node = document self.setup_parse(inputstring, document) self.setup_sections() parser = Parser() ast = parser.parse(inputstring + '\n') self.convert_ast(ast) self.finish_parse()
def parse(self, inputstring, document): self.document = document self.current_node = document self.config = self.default_config.copy() try: new_cfg = self.document.settings.env.config.recommonmark_config self.config.update(new_cfg) except AttributeError: pass self.setup_parse(inputstring, document) self.setup_sections() parser = Parser() ast = parser.parse(inputstring + '\n') self.convert_ast(ast) self.finish_parse()
from commonmark import Parser from omnidoc.markdown import markdown_to_tree, _md_ast_children example_md = """ # Heading 1 That is paragraph text. ## Subsection Also awesome *text* """ parser = Parser() example_md_ast = parser.parse(example_md) def test_md_ast_get_children(): children = _md_ast_children(example_md_ast) assert [x.t for x in children ] == ['heading', 'paragraph', 'heading', 'paragraph'] def test_markdown_to_tree(): tree = markdown_to_tree(example_md_ast) print(tree.pretty()) # TODO: write test if __name__ == "__main__": test_markdown_to_tree()
def markdown_to_notion(markdown: str) -> list: """ Convert Markdown formatted string to Notion. Arguments --------- markdown : str Text to convert. Returns ------- list of Block Blocks converted from input. """ # commonmark doesn't support strikethrough, # so we need to handle it ourselves while markdown.count("~~") >= 2: markdown = markdown.replace("~~", "<s>", 1) markdown = markdown.replace("~~", "</s>", 1) # we don't want to touch dashes, so temporarily replace them here markdown = markdown.replace("-", "⸻") parser = Parser() ast = prepare(parser.parse(markdown)) format = set() notion = [] for section in ast: _, ended_format = _extract_text_and_format_from_ast(section) if ended_format and ended_format in format: format.remove(ended_format) if section["type"] == "paragraph": notion.append(["\n\n"]) for item in section.get("children", []): literal, new_format = _extract_text_and_format_from_ast(item) if new_format: format.add(new_format) if item["type"] == "html_inline" and literal == "</s>": format.remove(("s", )) literal = "" if item["type"] == "softbreak": literal = "\n" if literal: notion.append([literal, [list(f) for f in sorted(format)]] if format else [literal]) # in the ast format, code blocks are meant # to be immediately self-closing if ("c", ) in format: format.remove(("c", )) # remove any trailing newlines from automatic closing paragraph markers if notion: notion[-1][0] = notion[-1][0].rstrip("\n") # consolidate any adjacent text blocks with identical styles consolidated = [] for item in notion: if consolidated and _get_format( consolidated[-1], as_set=True) == _get_format(item, as_set=True): consolidated[-1][0] += item[0] elif item[0]: consolidated.append(item) return _cleanup_dashes(consolidated)
def add_jobs(self, *, queryset) -> None: metadata = MetadataListFieldWithEuraxess() md_parser = Parser() md_renderer = HtmlRenderer() for job in (queryset.filter(add_to_euraxess=True).select_related( "institution").prefetch_related("links", "institution__links", "project__programme")): el = etree.SubElement(self.root, "job-opportunity") el.set("organisationIDKey", self.organisation_id_key) el.set("lastmodifieddate", date_filter(job.updated_at, "c")) etree.SubElement(el, "job-id").text = str(job.id) # description desc = etree.SubElement(el, "description") etree.SubElement(desc, "job-title").text = job.title etree.SubElement(desc, "job-description").text = md_renderer.render( md_parser.parse(job.description)) job_topics = self.parse_topics( metadata.to_representation(job.topics)) if len(job_topics) == 0: field = etree.SubElement(desc, "research-field") etree.SubElement( field, "main-research-field").text = "Computer science" etree.SubElement(field, "sub-research-field").text = "Other" else: for topic in job_topics: field = etree.SubElement(desc, "research-field") etree.SubElement( field, "main-research-field").text = "Computer science" etree.SubElement(field, "sub-research-field").text = topic researcher_profiles = self.parse_topics( metadata.to_representation(job.career_levels)) if len(researcher_profiles) == 0: etree.SubElement( desc, "researcher-profile" ).text = "Established Researcher (R3)" # TODO: check default else: for profile in researcher_profiles: etree.SubElement(desc, "researcher-profile").text = profile etree.SubElement(desc, "type-of-contract").text = "To be defined" etree.SubElement(desc, "job-status").text = "Negotiable" etree.SubElement(desc, "application-deadline").text = date_filter( datetime.combine(job.deadline, datetime.min.time()), "c") # additional-information extra_info = etree.SubElement(el, "additional-information") etree.SubElement( extra_info, "info-website" ).text = f"https://www.hipeac.net{job.get_absolute_url()}" # eu-funding eu_funding = etree.SubElement(el, "eu-funding") if job.project and job.project.programme: etree.SubElement(eu_funding, "framework-programme" ).text = job.project.programme.euraxess_value else: etree.SubElement(eu_funding, "framework-programme").text = "No" # work-location location = etree.SubElement(el, "work-location") etree.SubElement(location, "nr-job-positions").text = str(job.positions) etree.SubElement( location, "job-organisation-institute").text = job.institution.name etree.SubElement(location, "job-country").text = job.country.name etree.SubElement(location, "job-city").text = job.location # hiring-org-inst organisation_type = { Institution.UNIVERSITY: "Higher Education Institute", Institution.LAB: "Research Laboratory", Institution.INNOVATION: "Public Research Institution", Institution.INDUSTRY: "Large Company", Institution.SME: "Small Medium Enterprise, Start-up", Institution.OTHER: "Other", }[job.institution.type] institution = etree.SubElement(el, "hiring-org-inst") etree.SubElement( institution, "organisation-institute").text = job.institution.name etree.SubElement( institution, "organisation-institute-type").text = organisation_type etree.SubElement(institution, "country").text = job.institution.country.name if job.institution.location: etree.SubElement(institution, "city").text = job.institution.location if job.institution.recruitment_email: etree.SubElement( institution, "e-mail").text = job.institution.recruitment_email for link in job.institution.links.all(): etree.SubElement(institution, "website").text = link.url # application-details application_website = None for link in job.links.all(): if link.type == "website": application_website = link.url if application_website: application_details = etree.SubElement(el, "application-details") etree.SubElement(application_details, "how-to-apply").text = "website" etree.SubElement( application_details, "application-website").text = application_website elif job.email: application_details = etree.SubElement(el, "application-details") etree.SubElement(application_details, "how-to-apply").text = "e-mail" etree.SubElement(application_details, "application-email").text = job.email