def test_add_appinfo_element_present_in_second_of_two_appinfo(self):
        xsd_string = """
            <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
                <xs:element name="root">
                    <xs:annotation>
                        <xs:appinfo></xs:appinfo>
                        <xs:appinfo><attribute>old</attribute></xs:appinfo>
                    </xs:annotation>
                </xs:element>
            </xs:schema>
        """
        xpath = "xs:element"

        updated_xsd_string = delete_appinfo_element(xsd_string, xpath,
                                                    "attribute")

        expected_string = """
            <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
                <xs:element name="root">
                    <xs:annotation><xs:appinfo/><xs:appinfo/></xs:annotation>
                </xs:element>
            </xs:schema>
        """

        updated_tree = XSDTree.fromstring(updated_xsd_string)
        updated_xsd_string = XSDTree.tostring(updated_tree)

        expected_tree = XSDTree.fromstring(expected_string)
        expected_string = XSDTree.tostring(expected_tree)

        self.assertEqual(updated_xsd_string, expected_string)
Exemple #2
0
def set_xsd_element_occurrences(xsd_string, xpath, min_occurs, max_occurs):
    """Set occurrences of element.

    Args:
        xsd_string:
        xpath:
        min_occurs:
        max_occurs:

    Returns:

    """
    # build xsd tree
    xsd_tree = XSDTree.build_tree(xsd_string)
    # get namespaces
    namespaces = get_namespaces(xsd_string)
    # get default prefix
    default_prefix = get_default_prefix(namespaces)

    # set the element namespace
    xpath = xpath.replace(default_prefix + ":", LXML_SCHEMA_NAMESPACE)
    # add the element to the sequence
    element = xsd_tree.find(xpath)
    element.attrib["minOccurs"] = min_occurs
    element.attrib["maxOccurs"] = max_occurs

    # save the tree in the session
    xsd_string = XSDTree.tostring(xsd_tree)

    # return xsd string
    return xsd_string
Exemple #3
0
def rename_xsd_element(xsd_string, xpath, new_name):
    """Rename xsd element.

    Args:
        xsd_string:
        xpath:
        new_name:

    Returns:

    """
    # build the xsd tree
    xsd_tree = XSDTree.build_tree(xsd_string)
    # get the namespaces
    namespaces = get_namespaces(xsd_string)
    # get the default prefix
    default_prefix = get_default_prefix(namespaces)

    # set the element namespace
    xpath = xpath.replace(default_prefix + ":", LXML_SCHEMA_NAMESPACE)
    # rename element
    xsd_tree.find(xpath).attrib["name"] = new_name

    # rebuild xsd string
    xsd_string = XSDTree.tostring(xsd_tree)
    # return xsd string
    return xsd_string
Exemple #4
0
def get_content_by_xpath(xml_string, xpath, namespaces=None):
    """Get list of xml content by xpath

    Args:
        xml_string:
        xpath:
        namespaces:

    Returns:

    """
    # Build lxml tree from xml string
    xsd_tree = XSDTree.build_tree(xml_string)
    # Get values at xpath
    values_list = xsd_tree.xpath(xpath, namespaces=namespaces)

    # Build list of string values
    str_values_list = list()
    # Iterate through all xml elements found
    for value in values_list:
        # Get string value for element
        str_value = value if isinstance(value, str) else XSDTree.tostring(value)
        # Add value to list
        str_values_list.append(str_value)

    # Return list of string values found at xpath
    return str_values_list
Exemple #5
0
def delete_xsd_element(xsd_string, xpath):
    """Delete element from tree.

    Args:
        xsd_string:
        xpath:

    Returns:

    """
    # build xsd tree
    xsd_tree = XSDTree.build_tree(xsd_string)
    # get xsd namespaces
    namespaces = get_namespaces(xsd_string)
    # get default prefix
    default_prefix = get_default_prefix(namespaces)
    # set the element namespace
    xpath = xpath.replace(default_prefix + ":", LXML_SCHEMA_NAMESPACE)
    # get element to remove from tree
    element_to_remove = xsd_tree.find(xpath)
    # remove element from tree
    element_to_remove.getparent().remove(element_to_remove)

    # rebuild xsd string
    xsd_string = XSDTree.tostring(xsd_tree)
    # return xsd string
    return xsd_string
Exemple #6
0
def check_xml_file_is_valid(data, request=None):
    """Check if xml data is valid against a given schema.

    Args:
        data:
        request:

    Returns:

    """
    template = data.template

    try:
        xml_tree = XSDTree.build_tree(data.xml_content)
    except Exception as e:
        raise exceptions.XMLError(str(e))
    try:
        xsd_tree = XSDTree.build_tree(template.content)
    except Exception as e:
        raise exceptions.XSDError(str(e))
    error = validate_xml_data(xsd_tree, xml_tree, request=request)
    if error is not None:
        raise exceptions.XMLError(error)
    else:
        return True
Exemple #7
0
def get_hash(xml_string):
    """ Get the hash of an XML String. Removes blank text, comments,
    processing instructions and annotations from the input. Allows to
    retrieve the same hash for two similar XML string.

    Args:
        xml_string (str): XML String to hash

    Returns:
        str: SHA-1 hash of the XML string
    """
    # Load the required parser
    hash_parser = etree.XMLParser(remove_blank_text=True,
                                  remove_comments=True,
                                  remove_pis=True)
    etree.set_default_parser(parser=hash_parser)

    xml_tree = XSDTree.build_tree(xml_string)

    # Remove all annotations
    annotations = xml_tree.findall(
        ".//{http://www.w3.org/2001/XMLSchema}annotation")
    for annotation in annotations:
        annotation.getparent().remove(annotation)
    clean_xml_string = XSDTree.tostring(xml_tree)

    # Parse XML string into dict
    xml_dict = xmltodict.parse(clean_xml_string, dict_constructor=dict)
    # Returns the SHA-1 hash of the ordered dict
    return hash_dict(xml_dict)
    def test_generate_extension_with_single_child_attribute_returns_expected_json_dict(
        self, ):
        xsd_files = join("attribute", "single")
        xsd_tree = self.extension_data_handler.get_xsd(xsd_files)
        xsd_element = xsd_tree.xpath(
            "/xs:schema/xs:element/xs:complexType/xs:simpleContent/xs:extension",
            namespaces=self.namespaces,
        )[0]

        xml_tree = self.extension_data_handler.get_xml(xsd_files)
        xml_data = XSDTree.tostring(xml_tree)
        edit_data_tree = XSDTree.transform_to_xml(xml_data)

        # Generate result dict
        result_dict = self.parser.generate_extension(
            xsd_element,
            xsd_tree,
            full_path="/root",
            edit_data_tree=edit_data_tree,
            default_value="entry0",
        )

        # Load expected dictionary and compare with result
        expected_dict = self.extension_data_handler.get_json(xsd_files +
                                                             ".reload")
        self.assertDictEqual(expected_dict, result_dict)
    def test_add_appinfo_element_no_element_adds_it(self):
        xsd_string = """
            <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
            <xs:element name="root"><xs:annotation>
            <xs:appinfo></xs:appinfo></xs:annotation></xs:element>
            </xs:schema>
        """
        xpath = "xs:element"

        updated_xsd_string = add_appinfo_element(xsd_string, xpath,
                                                 "attribute", "value")

        expected_string = """
            <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
                <xs:element name="root">
                    <xs:annotation>
                        <xs:appinfo><attribute>value</attribute></xs:appinfo>
                    </xs:annotation>
                </xs:element>
            </xs:schema>
        """

        updated_tree = XSDTree.fromstring(updated_xsd_string)
        updated_xsd_string = XSDTree.tostring(updated_tree)

        expected_tree = XSDTree.fromstring(expected_string)
        expected_string = XSDTree.tostring(expected_tree)

        self.assertEqual(updated_xsd_string, expected_string)
Exemple #10
0
    def test_reload_simple_content_basic(self):
        xsd_files = join("simple_content", "basic")
        xsd_tree = self.complex_type_data_handler.get_xsd(xsd_files)
        xsd_element = xsd_tree.xpath(
            "/xs:schema/xs:complexType", namespaces=self.namespaces
        )[0]

        xml_tree = self.complex_type_data_handler.get_xml(xsd_files)
        xml_data = XSDTree.tostring(xml_tree)
        edit_data_tree = XSDTree.transform_to_xml(xml_data)

        xml_value = xml_tree.xpath("/root", namespaces=self.namespaces)[0].text

        # Generate result dict
        result_string = self.parser.generate_complex_type(
            xsd_element,
            xsd_tree,
            full_path="/root",
            edit_data_tree=edit_data_tree,
            default_value=xml_value,
        )

        # Load expected dictionary and compare with result
        expected_dict = self.complex_type_data_handler.get_json("%s.reload" % xsd_files)
        self.assertDictEqual(result_string, expected_dict)
 def test_iterparse_method_with_unicode(self):
     xsd_string = """
         <xs:schema xmlns:xs='http://www.w3.org/2001/XMLSchema'>
             <\u0192-root><test></test></\u0192-root>
         </xs:schema>
     """
     XSDTree.iterparse(xsd_string, ("end", ))
Exemple #12
0
def validate_form(request):
    """Validate data present in the form via XML validation.

    Args:
        request:

    Returns:

    """
    response_dict = {}
    try:
        # get curate data structure
        curate_data_structure_id = request.POST['id']
        curate_data_structure = curate_data_structure_api.get_by_id(
            curate_data_structure_id)

        # generate the XML
        xml_data = render_xml(
            curate_data_structure.data_structure_element_root)

        # build trees
        xsd_tree = XSDTree.build_tree(curate_data_structure.template.content)
        xml_tree = XSDTree.build_tree(xml_data)

        # validate XML document
        errors = validate_xml_data(xsd_tree, xml_tree)

        # FIXME: test xmlParseEntityRef exception: use of & < > forbidden
        if errors is not None:
            response_dict['errors'] = errors

    except Exception, e:
        message = e.message.replace('"', '\'')
        response_dict['errors'] = message
Exemple #13
0
def download_xml_build_req(request):
    """ Download xml of the building request.
    Args:
        request:

    Returns:
        XML file to download.

    """
    if 'xmlStringOAIPMH' in request.session:
        # We retrieve the XML file in session
        xml_string = request.session['xmlStringOAIPMH']
        try:
            xml_tree = XSDTree.build_tree(xml_string)
            xml_string_encoded = XSDTree.tostring(xml_tree, pretty=True)
        except:
            xml_string_encoded = xml_string
        # Get the date to append it to the file title
        i = datetime.datetime.now()
        title = "OAI_PMH_BUILD_REQ_%s_.xml" % i.isoformat()
        file_obj = StringIO(xml_string_encoded)
        # Return the XML file
        response = HttpResponse(FileWrapper(file_obj),
                                content_type='application/xml')
        response['Content-Disposition'] = 'attachment; filename=' + title

        return response
    else:
        return HttpResponseBadRequest(
            'An error occurred. Please reload the page and try again.')
Exemple #14
0
def insert_element_built_in_type(xsd_string, xpath, element_type_name):
    """Insert element with a builtin type in xsd string.

    Args:
        xsd_string: xsd string
        xpath: xpath where to insert the element
        element_type_name: name of the type to insert

    Returns:

    """
    # build the dom tree of the schema being built
    xsd_tree = XSDTree.build_tree(xsd_string)
    # get namespaces information for the schema
    namespaces = get_namespaces(xsd_string)
    # get the default namespace
    default_prefix = get_default_prefix(namespaces)
    # build xpath to element
    xpath = xpath.replace(default_prefix + ":", LXML_SCHEMA_NAMESPACE)

    type_name = default_prefix + ':' + element_type_name
    xsd_tree.find(xpath).append(
        XSDTree.create_element("{}element".format(LXML_SCHEMA_NAMESPACE),
                               attrib={
                                   'type': type_name,
                                   'name': element_type_name
                               }))
    # validate XML schema
    error = validate_xml_schema(xsd_tree)

    # if errors, raise exception
    if error is not None:
        raise XMLError(error)

    return XSDTree.tostring(xsd_tree)
Exemple #15
0
def _update_attribute(xsd_string, xpath, attribute, value=None):
    """Updates an attribute (sets the value or deletes)

    Args:
        xsd_string:
        xpath: xpath of the element to update
        attribute: name of the attribute to update
        value: value of the attribute to set

    Returns:

    """
    # Build the XSD tree
    xsd_tree = XSDTree.build_tree(xsd_string)
    # Get namespaces
    namespaces = get_namespaces(xsd_string)
    # Get XSD element using its xpath
    element = get_element_by_xpath(xsd_tree, xpath, namespaces)

    # Add or update the attribute
    if value is not None:
        # Set element attribute with value
        element.attrib[attribute] = value
    else:
        # Deletes attribute
        if attribute in element.attrib:
            del element.attrib[attribute]

    # Converts XSD tree back to string
    updated_xsd_string = XSDTree.tostring(xsd_tree)

    return updated_xsd_string
Exemple #16
0
def _update_appinfo_element(xsd_string, xpath, appinfo_name, value=None):
    """Updates an appinfo element

    Args:
        xsd_string:
        xpath: xpath to element to update
        appinfo_name: name of the attribute to update
        value: value to set

    Returns:

    """
    # Build the XSD tree
    xsd_tree = XSDTree.build_tree(xsd_string)
    # Get namespaces
    namespaces = get_namespaces(xsd_string)
    # Get XSD element using its xpath
    element = get_element_by_xpath(xsd_tree, xpath, namespaces)

    if value is not None:
        # If a value is provided, create or update the appinfo
        add_appinfo_child_to_element(element, appinfo_name, value)
    else:
        # value is None, deletes the appinfo if present
        delete_appinfo_child_from_element(element, appinfo_name)

    # Converts XSD tree back to string
    updated_xsd_string = XSDTree.tostring(xsd_tree)

    return updated_xsd_string
    def test_delete_appinfo_element_removed_if_exists(self):
        xsd_string = """
            <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
                <xs:element name="root">
                    <xs:annotation>
                        <xs:appinfo><attribute>value</attribute></xs:appinfo>
                    </xs:annotation>
                </xs:element>
            </xs:schema>
        """
        xpath = "xs:element"
        attribute_name = "attribute"
        updated_xsd_string = delete_appinfo_element(xsd_string, xpath,
                                                    attribute_name)

        expected_string = """
            <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
                <xs:element name="root">
                    <xs:annotation><xs:appinfo/></xs:annotation>
                </xs:element>
            </xs:schema>
        """

        updated_tree = XSDTree.fromstring(updated_xsd_string)
        updated_xsd_string = XSDTree.tostring(updated_tree)

        expected_tree = XSDTree.fromstring(expected_string)
        expected_string = XSDTree.tostring(expected_tree)

        self.assertEqual(updated_xsd_string, expected_string)
def set_status(data, status, user):
    """ Set the status of a data

    Args:
        data:
        status:
        user:

    Returns: Data

    """
    if status == DataStatus.DELETED and (data.workspace is None
                                         or data.workspace.is_public is False):
        raise exceptions.ModelError(
            "the " + get_data_label() +
            " should be published if the targeted status is 'Deleted'")

    # build the xsd tree
    xml_tree = XSDTree.build_tree(data.xml_content)
    # get the root
    root = xml_tree.getroot()
    # and change the attribute
    root.attrib['status'] = status
    # update the xml content
    data.xml_content = XSDTree.tostring(xml_tree)
    # upsert the data
    return data_api.upsert(data, user)
 def test_output_without_method_html_default(self):
     xsd_string = (
         '<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0" xmlns:rsm="http://schema.nist.gov/xml/res-md/1.0wd-02-2017">'
         '<html lang="en"><head></head><body></body></html> </xsl:stylesheet>'
     )
     xslt_parsed = XSDTree.build_tree(xsd_string)
     extension_result = XSDTree.get_extension(xslt_parsed)
     self.assertEqual(extension_result, "html")
 def test_output_without_method_xml_default(self):
     xsd_string = (
         '<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0" xmlns:rsm="http://schema.nist.gov/xml/res-md/1.0wd-02-2017">'
         '<xsl:output xsl="http://www.w3.org/1999/XSL/Transform" indent="yes" encoding="UTF-8" /> </xsl:stylesheet>'
     )
     xslt_parsed = XSDTree.build_tree(xsd_string)
     extension_result = XSDTree.get_extension(xslt_parsed)
     self.assertEqual(extension_result, "xml")
    def test_iterparse_method_without_decoded_symbols(self):
        xsd_string = """
            <xs:schema xmlns:xs='http://www.w3.org/2001/XMLSchema'>
                <ƒ-root><test></test></ƒ-root>
            </xs:schema>
        """

        with self.assertRaises(XMLError):
            XSDTree.iterparse(xsd_string, ("end", ))
    def test_remove_no_annotations_returns_same_value(self):
        xsd_string = '<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">' \
                     '<xs:element name="integer" type="xs:integer"/></xs:schema>'

        xsd_tree = XSDTree.build_tree(xsd_string)
        remove_annotations(xsd_tree)
        result_xsd_string = XSDTree.tostring(xsd_tree)

        self.assertTrue(xsd_string == result_xsd_string)
Exemple #23
0
def sanitize(input_value):
    """Sanitize the strings in the input

    :param input_value:
    :return:
    """
    # get the type of the input
    input_type = type(input_value)

    # input is a list
    if input_type == list:
        clean_value = []
        for item in input_value:
            clean_value.append(sanitize(item))

        return clean_value
    # input is a dict
    elif input_type == dict:
        return {
            sanitize(key): sanitize(val)
            for key, val in list(input_value.items())
        }
    # input is a string of characters
    elif input_type == str:
        try:
            # XML cleaning
            xml_cleaner_parser = etree.XMLParser(remove_blank_text=True)
            xml_data = XSDTree.fromstring(input_value,
                                          parser=xml_cleaner_parser)

            input_value = XSDTree.tostring(xml_data)
        except XMLError as e:
            # input is not XML, pass
            logger.warning("sanitize threw an exception: {0}".format(str(e)))

        finally:
            try:
                json_value = json.loads(input_value)
                sanitized_value = sanitize(json_value)

                clean_value = json.dumps(sanitized_value)
            except ValueError:
                clean_value = escape(input_value)

        return clean_value
    # input is a number
    elif input_type == int or input_type == float:
        return input_value
    # default, escape characters
    else:
        # Default sanitizing
        return escape(str(input_value))
    def test_remove_annotations_returns_tree_without_annotations(self):
        xsd_string = '<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">' \
                     '<xs:annotation><xs:appinfo/></xs:annotation>' \
                     '<xs:element name="integer" type="xs:integer"/></xs:schema>'

        expected_xsd_string = '<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">' \
                              '<xs:element name="integer" type="xs:integer"/></xs:schema>'

        xsd_tree = XSDTree.build_tree(xsd_string)
        remove_annotations(xsd_tree)
        result_xsd_string = XSDTree.tostring(xsd_tree)

        self.assertTrue(expected_xsd_string == result_xsd_string)
Exemple #25
0
def format_content_xml(xml_string):
    """Format XML content.

    Args:
        xml_string:

    Returns:

    """

    try:
        xml_tree = XSDTree.build_tree(xml_string)
        return XSDTree.tostring(xml_tree, pretty=True)
    except Exception:
        raise exceptions.XMLError("Content is not well formatted XML.")
Exemple #26
0
def insert_element_type(xsd_string, xpath, type_content, element_type_name,
                        include_url, request):
    """Insert an element of given type in xsd string, and validates result.

    Args:
        xsd_string: xsd string
        xpath: xpath where to insert the element
        type_content: string content of the type to insert
        element_type_name: name of the type
        include_url: url used to reference the type in schemaLocation
        request: request

    Returns:

    """

    new_xsd_tree = _insert_element_type(xsd_string, xpath, type_content,
                                        element_type_name, include_url)
    error = validate_xml_schema(new_xsd_tree, request=request)

    # if errors, raise exception
    if error is not None:
        raise XMLError(error)

    new_xsd_string = XSDTree.tostring(new_xsd_tree)

    return new_xsd_string
Exemple #27
0
def is_well_formed_xml(xml_string):
    """True if well formatted XML.

    Args:
        xml_string:

    Returns:

    """
    # is it a valid XML document?
    try:
        XSDTree.build_tree(xml_string)
    except Exception:
        return False

    return True
Exemple #28
0
def get_template_with_server_dependencies(xsd_string, dependencies, request=None):
    """Return the template with schema locations pointing to the server.

    Args:
        xsd_string:
        dependencies:
        request:

    Returns:

    """
    # replace includes/imports by API calls (get dependencies starting by the imports)
    try:
        xsd_tree = update_dependencies(xsd_string, dependencies)
    except Exception:
        raise exceptions.XSDError("Something went wrong during dependency update.")

    # validate the schema
    try:
        error = validate_xml_schema(xsd_tree, request=request)
    except Exception:
        raise exceptions.XSDError("Something went wrong during XSD validation.")

    # is it a valid XML document ?
    if error is None:
        updated_xsd_string = XSDTree.tostring(xsd_tree)
    else:
        raise exceptions.XSDError(error.replace("'", ""))

    return updated_xsd_string
Exemple #29
0
def get_record_elt(xml_elt, metadata_prefix):
    """Init a Record sickle object from a representative xml string.
    Args:
        xml_elt: XML string to convert toward Record sickle object.
        metadata_prefix: Metadata Prefix

    Returns:
        Representation of an Oai-Pmh record object.

    """
    record = Record(xml_elt)
    elt_ = {
        "identifier":
        record.header.identifier,
        "datestamp":
        record.header.datestamp,
        "deleted":
        record.deleted,
        "sets":
        record.header.setSpecs,
        "metadataPrefix":
        metadata_prefix,
        "metadata":
        XSDTree.tostring(
            record.xml.find(".//" + "{http://www.openarchives.org/OAI/2.0/}" +
                            "metadata/")) if not record.deleted else None,
        "raw":
        record.raw,
    }
    return elt_
Exemple #30
0
def update_dependencies(xsd_string, dependencies):
    """Update dependencies of the schemas with given dependencies.

    Args:
        xsd_string:
        dependencies:

    Returns:

    """
    # build the tree
    xsd_tree = XSDTree.build_tree(xsd_string)
    # get the imports
    xsd_imports = xsd_tree.findall(
        "{}import".format(xml_utils_constants.LXML_SCHEMA_NAMESPACE)
    )
    # get the includes
    xsd_includes = xsd_tree.findall(
        "{}include".format(xml_utils_constants.LXML_SCHEMA_NAMESPACE)
    )

    for schema_location, dependency_id in dependencies.items():
        if dependency_id is not None:
            for xsd_include in xsd_includes:
                if schema_location == xsd_include.attrib["schemaLocation"]:
                    xsd_include.attrib["schemaLocation"] = _get_schema_location_uri(
                        dependency_id
                    )

            for xsd_import in xsd_imports:
                if schema_location == xsd_import.attrib["schemaLocation"]:
                    xsd_import.attrib["schemaLocation"] = _get_schema_location_uri(
                        dependency_id
                    )
    return xsd_tree