Beispiel #1
0
def get_jats_abstract(abstract):
    # Convert the abstract to jats abstract tags
    abstract = etoolsutils.remove_tag_and_text("object-id", abstract)
    abstract = etoolsutils.remove_tag("abstract", abstract)
    abstract = utils_html.remove_comment_tags(abstract)
    abstract = etoolsutils.escape_ampersand(abstract)
    abstract = etoolsutils.escape_unmatched_angle_brackets(
        abstract, utils.allowed_tags())

    abstract = replace_jats_tag("sec", "jats:sec", abstract)
    abstract = replace_jats_tag("related-object", "jats:related-object",
                                abstract)
    abstract = replace_jats_tag("title", "jats:title", abstract)

    abstract = eautils.replace_tags(abstract, "p", "jats:p")
    abstract = eautils.replace_tags(abstract, "italic", "jats:italic")
    abstract = eautils.replace_tags(abstract, "bold", "jats:bold")
    abstract = eautils.replace_tags(abstract, "underline", "jats:underline")
    abstract = eautils.replace_tags(abstract, "sub", "jats:sub")
    abstract = eautils.replace_tags(abstract, "sup", "jats:sup")
    abstract = eautils.replace_tags(abstract, "sc", "jats:sc")

    abstract = replace_jats_tag("inline-formula", "jats:inline-formula",
                                abstract)
    abstract = replace_jats_tag("ext-link", "jats:ext-link", abstract)
    abstract = replace_jats_tag("xref", "jats:xref", abstract)

    # remove rid attributes
    abstract = remove_tag_attr("rid", abstract)

    return abstract
def convert_inline_tags(original_string):
    tag_converted_string = etoolsutils.escape_ampersand(original_string)
    tag_converted_string = etoolsutils.escape_unmatched_angle_brackets(
        tag_converted_string, utils.allowed_tags()
    )
    tag_converted_string = eautils.replace_tags(tag_converted_string, "italic", "i")
    tag_converted_string = eautils.replace_tags(tag_converted_string, "bold", "b")
    tag_converted_string = eautils.replace_tags(tag_converted_string, "underline", "u")
    return tag_converted_string
 def test_escape_unmatched_angle_brackets(self, value, expected):
     """
     Test some additional examples of unmatched angle brackets specifically
     """
     self.assertEqual(
         utils.escape_unmatched_angle_brackets(value,
                                               allowed_xml_tag_fragments()),
         expected,
     )
Beispiel #4
0
def get_basic_abstract(abstract):
    # Strip inline tags, keep the p tags
    abstract = etoolsutils.remove_tag_and_text("object-id", abstract)
    abstract = etoolsutils.remove_tag("related-object", abstract)
    abstract = etoolsutils.remove_tag("abstract", abstract)
    abstract = utils_html.remove_comment_tags(abstract)
    abstract = etoolsutils.escape_ampersand(abstract)
    abstract = etoolsutils.escape_unmatched_angle_brackets(
        abstract, utils.allowed_tags())
    abstract = convert_sec_tags(abstract)
    abstract = tags.clean_tags(abstract,
                               do_not_clean=["<p>", "</p>", "<mml:", "</mml:"])
    abstract = eautils.replace_tags(abstract, "p", "jats:p")
    return abstract
Beispiel #5
0
def convert_to_xml_string(string):
    """
    For input strings with escaped tags and special characters
    issue a set of conversion functions to prepare it prior
    to adding it to an article object
    """
    string = entity_to_unicode(string)
    string = decode_brackets(string)
    string = eautils.replace_tags(string, "i", "italic")
    string = eautils.replace_tags(string, "u", "underline")
    string = eautils.replace_tags(string, "b", "bold")
    string = eautils.replace_tags(string, "em", "italic")
    string = etoolsutils.escape_unmatched_angle_brackets(
        string, allowed_tags())
    return string
def add_clean_tag(
    parent,
    tag_name,
    original_string,
    namespaces=REPARSING_NAMESPACES,
    attributes=None,
    attributes_text="",
):
    """remove allowed tags and then add a tag the parent"""
    tag_converted_string = clean_tags(original_string)
    tag_converted_string = etoolsutils.escape_ampersand(tag_converted_string)
    tag_converted_string = etoolsutils.escape_unmatched_angle_brackets(
        tag_converted_string
    )
    minidom_tag = xmlio.reparsed_tag(
        tag_name, tag_converted_string, namespaces, attributes_text
    )
    append_tag(parent, minidom_tag, attributes=attributes)
Beispiel #7
0
def escape_xml(xml_string):
    "escape ampersands and unmatched angle brackets in HTML string allowing some whitelisted tags"
    xml_string = escape_ampersand(xml_string)
    return escape_unmatched_angle_brackets(xml_string,
                                           allowed_xml_tag_fragments())