コード例 #1
0
 def parse(self, inputstring, document):
     self.document = document
     self.current_node = document
     self.setup_parse(inputstring, document)
     self.setup_sections()
     parser = Parser()
     ast = parser.parse(inputstring + '\n')
     self.convert_ast(ast)
     self.finish_parse()
コード例 #2
0
 def parse(self, inputstring, document):
     self.document = document
     self.current_node = document
     self.config = self.default_config.copy()
     try:
         new_cfg = self.document.settings.env.config.recommonmark_config
         self.config.update(new_cfg)
     except AttributeError:
         pass
     self.setup_parse(inputstring, document)
     self.setup_sections()
     parser = Parser()
     ast = parser.parse(inputstring + '\n')
     self.convert_ast(ast)
     self.finish_parse()
コード例 #3
0
from commonmark import Parser
from omnidoc.markdown import markdown_to_tree, _md_ast_children

example_md = """
# Heading 1

That is paragraph text.


## Subsection

Also awesome *text*
"""
parser = Parser()
example_md_ast = parser.parse(example_md)


def test_md_ast_get_children():
    children = _md_ast_children(example_md_ast)
    assert [x.t for x in children
            ] == ['heading', 'paragraph', 'heading', 'paragraph']


def test_markdown_to_tree():
    tree = markdown_to_tree(example_md_ast)
    print(tree.pretty())
    # TODO: write test


if __name__ == "__main__":
    test_markdown_to_tree()
コード例 #4
0
def markdown_to_notion(markdown: str) -> list:
    """
    Convert Markdown formatted string to Notion.


    Arguments
    ---------
    markdown : str
        Text to convert.


    Returns
    -------
    list of Block
        Blocks converted from input.
    """

    # commonmark doesn't support strikethrough,
    # so we need to handle it ourselves
    while markdown.count("~~") >= 2:
        markdown = markdown.replace("~~", "<s>", 1)
        markdown = markdown.replace("~~", "</s>", 1)

    # we don't want to touch dashes, so temporarily replace them here
    markdown = markdown.replace("-", "⸻")

    parser = Parser()
    ast = prepare(parser.parse(markdown))

    format = set()

    notion = []

    for section in ast:

        _, ended_format = _extract_text_and_format_from_ast(section)
        if ended_format and ended_format in format:
            format.remove(ended_format)

        if section["type"] == "paragraph":
            notion.append(["\n\n"])

        for item in section.get("children", []):

            literal, new_format = _extract_text_and_format_from_ast(item)

            if new_format:
                format.add(new_format)

            if item["type"] == "html_inline" and literal == "</s>":
                format.remove(("s", ))
                literal = ""

            if item["type"] == "softbreak":
                literal = "\n"

            if literal:
                notion.append([literal, [list(f) for f in sorted(format)]]
                              if format else [literal])

            # in the ast format, code blocks are meant
            # to be immediately self-closing
            if ("c", ) in format:
                format.remove(("c", ))

    # remove any trailing newlines from automatic closing paragraph markers
    if notion:
        notion[-1][0] = notion[-1][0].rstrip("\n")

    # consolidate any adjacent text blocks with identical styles
    consolidated = []
    for item in notion:
        if consolidated and _get_format(
                consolidated[-1], as_set=True) == _get_format(item,
                                                              as_set=True):
            consolidated[-1][0] += item[0]
        elif item[0]:
            consolidated.append(item)

    return _cleanup_dashes(consolidated)
コード例 #5
0
ファイル: euraxess.py プロジェクト: CreativeOthman/hipeac
    def add_jobs(self, *, queryset) -> None:
        metadata = MetadataListFieldWithEuraxess()
        md_parser = Parser()
        md_renderer = HtmlRenderer()

        for job in (queryset.filter(add_to_euraxess=True).select_related(
                "institution").prefetch_related("links", "institution__links",
                                                "project__programme")):
            el = etree.SubElement(self.root, "job-opportunity")
            el.set("organisationIDKey", self.organisation_id_key)
            el.set("lastmodifieddate", date_filter(job.updated_at, "c"))
            etree.SubElement(el, "job-id").text = str(job.id)

            # description

            desc = etree.SubElement(el, "description")
            etree.SubElement(desc, "job-title").text = job.title
            etree.SubElement(desc,
                             "job-description").text = md_renderer.render(
                                 md_parser.parse(job.description))

            job_topics = self.parse_topics(
                metadata.to_representation(job.topics))
            if len(job_topics) == 0:
                field = etree.SubElement(desc, "research-field")
                etree.SubElement(
                    field, "main-research-field").text = "Computer science"
                etree.SubElement(field, "sub-research-field").text = "Other"
            else:
                for topic in job_topics:
                    field = etree.SubElement(desc, "research-field")
                    etree.SubElement(
                        field, "main-research-field").text = "Computer science"
                    etree.SubElement(field, "sub-research-field").text = topic

            researcher_profiles = self.parse_topics(
                metadata.to_representation(job.career_levels))
            if len(researcher_profiles) == 0:
                etree.SubElement(
                    desc, "researcher-profile"
                ).text = "Established Researcher (R3)"  # TODO: check default
            else:
                for profile in researcher_profiles:
                    etree.SubElement(desc, "researcher-profile").text = profile

            etree.SubElement(desc, "type-of-contract").text = "To be defined"
            etree.SubElement(desc, "job-status").text = "Negotiable"
            etree.SubElement(desc, "application-deadline").text = date_filter(
                datetime.combine(job.deadline, datetime.min.time()), "c")

            # additional-information

            extra_info = etree.SubElement(el, "additional-information")
            etree.SubElement(
                extra_info, "info-website"
            ).text = f"https://www.hipeac.net{job.get_absolute_url()}"

            # eu-funding

            eu_funding = etree.SubElement(el, "eu-funding")
            if job.project and job.project.programme:
                etree.SubElement(eu_funding, "framework-programme"
                                 ).text = job.project.programme.euraxess_value
            else:
                etree.SubElement(eu_funding, "framework-programme").text = "No"

            # work-location

            location = etree.SubElement(el, "work-location")
            etree.SubElement(location,
                             "nr-job-positions").text = str(job.positions)
            etree.SubElement(
                location,
                "job-organisation-institute").text = job.institution.name
            etree.SubElement(location, "job-country").text = job.country.name
            etree.SubElement(location, "job-city").text = job.location

            # hiring-org-inst

            organisation_type = {
                Institution.UNIVERSITY: "Higher Education Institute",
                Institution.LAB: "Research Laboratory",
                Institution.INNOVATION: "Public Research Institution",
                Institution.INDUSTRY: "Large Company",
                Institution.SME: "Small Medium Enterprise, Start-up",
                Institution.OTHER: "Other",
            }[job.institution.type]

            institution = etree.SubElement(el, "hiring-org-inst")
            etree.SubElement(
                institution,
                "organisation-institute").text = job.institution.name
            etree.SubElement(
                institution,
                "organisation-institute-type").text = organisation_type
            etree.SubElement(institution,
                             "country").text = job.institution.country.name
            if job.institution.location:
                etree.SubElement(institution,
                                 "city").text = job.institution.location
            if job.institution.recruitment_email:
                etree.SubElement(
                    institution,
                    "e-mail").text = job.institution.recruitment_email
            for link in job.institution.links.all():
                etree.SubElement(institution, "website").text = link.url

            # application-details

            application_website = None

            for link in job.links.all():
                if link.type == "website":
                    application_website = link.url

            if application_website:
                application_details = etree.SubElement(el,
                                                       "application-details")
                etree.SubElement(application_details,
                                 "how-to-apply").text = "website"
                etree.SubElement(
                    application_details,
                    "application-website").text = application_website
            elif job.email:
                application_details = etree.SubElement(el,
                                                       "application-details")
                etree.SubElement(application_details,
                                 "how-to-apply").text = "e-mail"
                etree.SubElement(application_details,
                                 "application-email").text = job.email