def check_xml_file_is_valid(data, request=None): """Check if xml data is valid against a given schema. Args: data: request: Returns: """ template = data.template try: xml_tree = XSDTree.build_tree(data.xml_content) except Exception as e: raise exceptions.XMLError(str(e)) try: xsd_tree = XSDTree.build_tree(template.content) except Exception as e: raise exceptions.XSDError(str(e)) error = validate_xml_data(xsd_tree, xml_tree, request=request) if error is not None: raise exceptions.XMLError(error) else: return True
def validate_form(request): """Validate data present in the form via XML validation. Args: request: Returns: """ response_dict = {} try: # get curate data structure curate_data_structure_id = request.POST['id'] curate_data_structure = curate_data_structure_api.get_by_id( curate_data_structure_id) # generate the XML xml_data = render_xml( curate_data_structure.data_structure_element_root) # build trees xsd_tree = XSDTree.build_tree(curate_data_structure.template.content) xml_tree = XSDTree.build_tree(xml_data) # validate XML document errors = validate_xml_data(xsd_tree, xml_tree) # FIXME: test xmlParseEntityRef exception: use of & < > forbidden if errors is not None: response_dict['errors'] = errors except Exception, e: message = e.message.replace('"', '\'') response_dict['errors'] = message
def insert_element_built_in_type(xsd_string, xpath, element_type_name): """Insert element with a builtin type in xsd string. Args: xsd_string: xsd string xpath: xpath where to insert the element element_type_name: name of the type to insert Returns: """ # build the dom tree of the schema being built xsd_tree = XSDTree.build_tree(xsd_string) # get namespaces information for the schema namespaces = get_namespaces(xsd_string) # get the default namespace default_prefix = get_default_prefix(namespaces) # build xpath to element xpath = xpath.replace(default_prefix + ":", LXML_SCHEMA_NAMESPACE) type_name = default_prefix + ':' + element_type_name xsd_tree.find(xpath).append( XSDTree.create_element("{}element".format(LXML_SCHEMA_NAMESPACE), attrib={ 'type': type_name, 'name': element_type_name })) # validate XML schema error = validate_xml_schema(xsd_tree) # if errors, raise exception if error is not None: raise XMLError(error) return XSDTree.tostring(xsd_tree)
def get_hash(xml_string): """ Get the hash of an XML String. Removes blank text, comments, processing instructions and annotations from the input. Allows to retrieve the same hash for two similar XML string. Args: xml_string (str): XML String to hash Returns: str: SHA-1 hash of the XML string """ # Load the required parser hash_parser = etree.XMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True) etree.set_default_parser(parser=hash_parser) xml_tree = XSDTree.build_tree(xml_string) # Remove all annotations annotations = xml_tree.findall( ".//{http://www.w3.org/2001/XMLSchema}annotation") for annotation in annotations: annotation.getparent().remove(annotation) clean_xml_string = XSDTree.tostring(xml_tree) # Parse XML string into dict xml_dict = xmltodict.parse(clean_xml_string, dict_constructor=dict) # Returns the SHA-1 hash of the ordered dict return hash_dict(xml_dict)
def save_type(request): """Save the current type in the database. Args: request: Returns: """ try: type_name = request.POST['typeName'] template_id = request.POST['templateID'] xsd_string = request.session['newXmlTemplateCompose'] response_dict = {} # can save as type if new type or from existing type if template_id != "new": try: # check if the type exists, raises exception otherise type_api.get(template_id) except: # the type does not exist return _error_response( "Unable to save an existing template as a type.") try: # remove root from tree if present xsd_string = composer_xml_utils.remove_single_root_element( xsd_string) # build xsd tree xsd_tree = XSDTree.build_tree(xsd_string) # validate the schema error = main_xml_utils.validate_xml_schema(xsd_tree) if error is not None: return _error_response('This is not a valid XML schema. ' + error) except Exception, e: return _error_response('This is not a valid XML schema. ' + e.message) dependencies = _get_dependencies_ids( request.session["includedTypesCompose"]) try: # create type version manager type_version_manager = TypeVersionManager(title=type_name, user=str( request.user.id)) # create type type_object = Type(filename=type_name, content=xsd_string, dependencies=dependencies) # save type in database type_version_manager_api.insert(type_version_manager, type_object) except exceptions.NotUniqueError, e: return HttpResponseBadRequest( "A type with the same name already exists. Please choose another name." )
def download_xml_build_req(request): """ Download xml of the building request. Args: request: Returns: XML file to download. """ if 'xmlStringOAIPMH' in request.session: # We retrieve the XML file in session xml_string = request.session['xmlStringOAIPMH'] try: xml_tree = XSDTree.build_tree(xml_string) xml_string_encoded = XSDTree.tostring(xml_tree, pretty=True) except: xml_string_encoded = xml_string # Get the date to append it to the file title i = datetime.datetime.now() title = "OAI_PMH_BUILD_REQ_%s_.xml" % i.isoformat() file_obj = StringIO(xml_string_encoded) # Return the XML file response = HttpResponse(FileWrapper(file_obj), content_type='application/xml') response['Content-Disposition'] = 'attachment; filename=' + title return response else: return HttpResponseBadRequest( 'An error occurred. Please reload the page and try again.')
def rename_element(request): """Replace the current name of the element by the new name. Args: request: Returns: """ try: xpath = request.POST['xpath'] new_name = request.POST['newName'] xsd_string = request.session['newXmlTemplateCompose'] # rename element xsd_string = composer_xml_utils.rename_xsd_element( xsd_string, xpath, new_name) # build xsd tree xsd_tree = XSDTree.build_tree(xsd_string) # validate the schema error = main_xml_utils.validate_xml_schema(xsd_tree) if error is not None: return _error_response("This is not a valid name.") # save the tree in the session request.session['newXmlTemplateCompose'] = xsd_string return HttpResponse(json.dumps({}), content_type='application/javascript') except Exception, e: return HttpResponseBadRequest(e.message, content_type='application/javascript')
def update_dependencies(xsd_string, dependencies): """Update dependencies of the schemas with given dependencies. Args: xsd_string: dependencies: Returns: """ # build the tree xsd_tree = XSDTree.build_tree(xsd_string) # get the imports xsd_imports = xsd_tree.findall( "{}import".format(xml_utils_constants.LXML_SCHEMA_NAMESPACE) ) # get the includes xsd_includes = xsd_tree.findall( "{}include".format(xml_utils_constants.LXML_SCHEMA_NAMESPACE) ) for schema_location, dependency_id in dependencies.items(): if dependency_id is not None: for xsd_include in xsd_includes: if schema_location == xsd_include.attrib["schemaLocation"]: xsd_include.attrib["schemaLocation"] = _get_schema_location_uri( dependency_id ) for xsd_import in xsd_imports: if schema_location == xsd_import.attrib["schemaLocation"]: xsd_import.attrib["schemaLocation"] = _get_schema_location_uri( dependency_id ) return xsd_tree
def get_content_by_xpath(xml_string, xpath, namespaces=None): """Get list of xml content by xpath Args: xml_string: xpath: namespaces: Returns: """ # Build lxml tree from xml string xsd_tree = XSDTree.build_tree(xml_string) # Get values at xpath values_list = xsd_tree.xpath(xpath, namespaces=namespaces) # Build list of string values str_values_list = list() # Iterate through all xml elements found for value in values_list: # Get string value for element str_value = value if isinstance(value, str) else XSDTree.tostring(value) # Add value to list str_values_list.append(str_value) # Return list of string values found at xpath return str_values_list
def test_target_namespace_with_prefix_returns_target_namespace_and_prefix( self): xsd_string = "<schema targetNamespace='namespace' xmlns:ns='namespace'></schema>" xsd_tree = XSDTree.build_tree(xsd_string) namespaces = get_namespaces(xsd_string) self.assertEquals(('namespace', 'ns'), get_target_namespace(xsd_tree, namespaces))
def _update_attribute(xsd_string, xpath, attribute, value=None): """Updates an attribute (sets the value or deletes) Args: xsd_string: xpath: xpath of the element to update attribute: name of the attribute to update value: value of the attribute to set Returns: """ # Build the XSD tree xsd_tree = XSDTree.build_tree(xsd_string) # Get namespaces namespaces = get_namespaces(xsd_string) # Get XSD element using its xpath element = get_element_by_xpath(xsd_tree, xpath, namespaces) # Add or update the attribute if value is not None: # Set element attribute with value element.attrib[attribute] = value else: # Deletes attribute if attribute in element.attrib: del element.attrib[attribute] # Converts XSD tree back to string updated_xsd_string = XSDTree.tostring(xsd_tree) return updated_xsd_string
def set_status(data, status, user): """ Set the status of a data Args: data: status: user: Returns: Data """ if status == DataStatus.DELETED and (data.workspace is None or data.workspace.is_public is False): raise exceptions.ModelError( "the " + get_data_label() + " should be published if the targeted status is 'Deleted'") # build the xsd tree xml_tree = XSDTree.build_tree(data.xml_content) # get the root root = xml_tree.getroot() # and change the attribute root.attrib['status'] = status # update the xml content data.xml_content = XSDTree.tostring(xml_tree) # upsert the data return data_api.upsert(data, user)
def delete_xsd_element(xsd_string, xpath): """Delete element from tree. Args: xsd_string: xpath: Returns: """ # build xsd tree xsd_tree = XSDTree.build_tree(xsd_string) # get xsd namespaces namespaces = get_namespaces(xsd_string) # get default prefix default_prefix = get_default_prefix(namespaces) # set the element namespace xpath = xpath.replace(default_prefix + ":", LXML_SCHEMA_NAMESPACE) # get element to remove from tree element_to_remove = xsd_tree.find(xpath) # remove element from tree element_to_remove.getparent().remove(element_to_remove) # rebuild xsd string xsd_string = XSDTree.tostring(xsd_tree) # return xsd string return xsd_string
def is_well_formed_xml(xml_string): """True if well formatted XML. Args: xml_string: Returns: """ # is it a valid XML document? try: XSDTree.build_tree(xml_string) except Exception: return False return True
def rename_xsd_element(xsd_string, xpath, new_name): """Rename xsd element. Args: xsd_string: xpath: new_name: Returns: """ # build the xsd tree xsd_tree = XSDTree.build_tree(xsd_string) # get the namespaces namespaces = get_namespaces(xsd_string) # get the default prefix default_prefix = get_default_prefix(namespaces) # set the element namespace xpath = xpath.replace(default_prefix + ":", LXML_SCHEMA_NAMESPACE) # rename element xsd_tree.find(xpath).attrib["name"] = new_name # rebuild xsd string xsd_string = XSDTree.tostring(xsd_tree) # return xsd string return xsd_string
def set_xsd_element_occurrences(xsd_string, xpath, min_occurs, max_occurs): """Set occurrences of element. Args: xsd_string: xpath: min_occurs: max_occurs: Returns: """ # build xsd tree xsd_tree = XSDTree.build_tree(xsd_string) # get namespaces namespaces = get_namespaces(xsd_string) # get default prefix default_prefix = get_default_prefix(namespaces) # set the element namespace xpath = xpath.replace(default_prefix + ":", LXML_SCHEMA_NAMESPACE) # add the element to the sequence element = xsd_tree.find(xpath) element.attrib["minOccurs"] = min_occurs element.attrib["maxOccurs"] = max_occurs # save the tree in the session xsd_string = XSDTree.tostring(xsd_tree) # return xsd string return xsd_string
def _update_appinfo_element(xsd_string, xpath, appinfo_name, value=None): """Updates an appinfo element Args: xsd_string: xpath: xpath to element to update appinfo_name: name of the attribute to update value: value to set Returns: """ # Build the XSD tree xsd_tree = XSDTree.build_tree(xsd_string) # Get namespaces namespaces = get_namespaces(xsd_string) # Get XSD element using its xpath element = get_element_by_xpath(xsd_tree, xpath, namespaces) if value is not None: # If a value is provided, create or update the appinfo add_appinfo_child_to_element(element, appinfo_name, value) else: # value is None, deletes the appinfo if present delete_appinfo_child_from_element(element, appinfo_name) # Converts XSD tree back to string updated_xsd_string = XSDTree.tostring(xsd_tree) return updated_xsd_string
def get_xsd_element_occurrences(xsd_string, xpath): """Get the min and max occurrences of the element. Args: xsd_string: xpath: Returns: """ # build the xsd tree xsd_tree = XSDTree.build_tree(xsd_string) # get the namespaces namespaces = get_namespaces(xsd_string) # get the default prefix default_prefix = get_default_prefix(namespaces) # set the element namespace xpath = xpath.replace(default_prefix + ":", LXML_SCHEMA_NAMESPACE) # add the element to the sequence element = xsd_tree.find(xpath) if "minOccurs" in element.attrib: min_occurs = element.attrib["minOccurs"] else: min_occurs = "1" if "maxOccurs" in element.attrib: max_occurs = element.attrib["maxOccurs"] else: max_occurs = "1" return min_occurs, max_occurs
def test_output_without_method_html_default(self): xsd_string = ( '<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0" xmlns:rsm="http://schema.nist.gov/xml/res-md/1.0wd-02-2017">' '<html lang="en"><head></head><body></body></html> </xsl:stylesheet>' ) xslt_parsed = XSDTree.build_tree(xsd_string) extension_result = XSDTree.get_extension(xslt_parsed) self.assertEqual(extension_result, "html")
def test_output_without_method_xml_default(self): xsd_string = ( '<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0" xmlns:rsm="http://schema.nist.gov/xml/res-md/1.0wd-02-2017">' '<xsl:output xsl="http://www.w3.org/1999/XSL/Transform" indent="yes" encoding="UTF-8" /> </xsl:stylesheet>' ) xslt_parsed = XSDTree.build_tree(xsd_string) extension_result = XSDTree.get_extension(xslt_parsed) self.assertEqual(extension_result, "xml")
def test_get_element_xpath_matching_element_with_xs_namespace_prefix(self): xsd_string = "<xs:schema xmlns:xs='http://www.w3.org/2001/XMLSchema'><xs:element><xs:complexType>" \ "</xs:complexType></xs:element></xs:schema>" xpath = "xs:element/xs:complexType" xsd_tree = XSDTree.build_tree(xsd_string) namespaces = get_namespaces(xsd_string) element = get_element_by_xpath(xsd_tree, xpath, namespaces) self.assertTrue(element is not None)
def test_get_element_xpath_not_matching_element_without_namespace_prefix( self): xsd_string = "<xsd:schema xmlns:xsd='http://www.w3.org/2001/XMLSchema'><xsd:element><xsd:complexType>" \ "</xsd:complexType></xsd:element></xsd:schema>" xpath = "element/complexType" xsd_tree = XSDTree.build_tree(xsd_string) namespaces = get_namespaces(xsd_string) with self.assertRaises(XMLError): get_element_by_xpath(xsd_tree, xpath, namespaces)
def test_remove_no_annotations_returns_same_value(self): xsd_string = '<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">' \ '<xs:element name="integer" type="xs:integer"/></xs:schema>' xsd_tree = XSDTree.build_tree(xsd_string) remove_annotations(xsd_tree) result_xsd_string = XSDTree.tostring(xsd_tree) self.assertTrue(xsd_string == result_xsd_string)
def validate_form(request): """Validate data present in the form via XML validation. Args: request: Returns: """ response_dict = {} try: # get curate data structure curate_data_structure_id = request.POST["id"] curate_data_structure = curate_data_structure_api.get_by_id( curate_data_structure_id, request.user) # generate the XML xml_data = render_xml( request, curate_data_structure.data_structure_element_root) # build trees template = template_api.get(str(curate_data_structure.template.id), request=request) xsd_tree = XSDTree.build_tree(template.content) xml_tree = XSDTree.build_tree(xml_data) # validate XML document errors = validate_xml_data(xsd_tree, xml_tree, request=request) if errors is not None: response_dict["errors"] = errors except XMLSyntaxError as xml_syntax_error: response_dict[ "errors"] = "Your XML data is not well formatted. " + str( xml_syntax_error) except Exception as e: message = (str(e).replace('"', "'") if str(e) is not None else "The current document cannot be validated.") response_dict["errors"] = message return HttpResponse(json.dumps(response_dict), content_type="application/javascript")
def test_remove_annotations_returns_tree_without_annotations(self): xsd_string = '<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">' \ '<xs:annotation><xs:appinfo/></xs:annotation>' \ '<xs:element name="integer" type="xs:integer"/></xs:schema>' expected_xsd_string = '<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">' \ '<xs:element name="integer" type="xs:integer"/></xs:schema>' xsd_tree = XSDTree.build_tree(xsd_string) remove_annotations(xsd_tree) result_xsd_string = XSDTree.tostring(xsd_tree) self.assertTrue(expected_xsd_string == result_xsd_string)
def xsl_transform(xml_string, xslt_string): """Apply transformation to xml. Args: xml_string: xslt_string: Returns: """ try: # Build the XSD and XSLT tree xslt_tree = XSDTree.build_tree(xslt_string) xsd_tree = XSDTree.build_tree(xml_string) # Get the XSLT transformation and transform the XSD transform = XSDTree.transform_to_xslt(xslt_tree) transformed_tree = transform(xsd_tree) return str(transformed_tree) except Exception: raise exceptions.CoreError( "An unexpected exception happened while transforming the XML")
def save_template(request): """Save the current template in the database. Args: request: Returns: """ try: template_name = request.POST['templateName'] xsd_string = request.session['newXmlTemplateCompose'] response_dict = {} try: # Build XSD tree xsd_tree = XSDTree.build_tree(xsd_string) # validate the schema error = main_xml_utils.validate_xml_schema(xsd_tree) if error is not None: return _error_response('This is not a valid XML schema. ' + error) except Exception, e: return _error_response('This is not a valid XML schema. ' + e.message) # get list of dependencies dependencies = _get_dependencies_ids( request.session["includedTypesCompose"]) try: # create template version manager template_version_manager = TemplateVersionManager( title=template_name, user=str(request.user.id)) # create template template = Template(filename=template_name, content=xsd_string, dependencies=dependencies) # save template in database template_version_manager_api.insert(template_version_manager, template) except exceptions.NotUniqueError: return HttpResponseBadRequest( "A template with the same name already exists. Please choose another name." ) except Exception, e: return _error_response(e.message)
def format_content_xml(xml_string): """Format XML content. Args: xml_string: Returns: """ try: xml_tree = XSDTree.build_tree(xml_string) return XSDTree.tostring(xml_tree, pretty=True) except Exception: raise exceptions.XMLError("Content is not well formatted XML.")
def get_flat(self): """Returns the flattened file Returns: """ # builds the parser parser = etree.XMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True) # parse the XML String removing blanks, comments, processing instructions xml_tree = XSDTree.build_tree(self.xml_string, parser=parser) # replace the includes by their content return self._replace_all_includes_by_content(xml_tree)
def check_type_core_support(xsd_string): """Check that the format of the the type is supported by the current version of the Core. Return the type definition (simpleType or complexType). Args: xsd_string: Returns: type_definition: simpleType or complexType. """ type_definition = "" error_message = ( "A type should be a valid XML schema containing only one type definition " "(Allowed tags are: simpleType or complexType and include).") # check that well formatted first if not is_well_formed_xml(xsd_string): raise XMLError("Uploaded file is not well formatted XML.") # build the tree xsd_tree = XSDTree.build_tree(xsd_string) # get elements elements = xsd_tree.findall("*") if len(elements) > 0: # only simpleType, complexType or include for element in elements: if ("complexType" not in element.tag and "simpleType" not in element.tag and "include" not in element.tag): raise CoreError(error_message) # only one type cpt_type = 0 for element in elements: if "complexType" in element.tag or "simpleType" in element.tag: cpt_type += 1 if cpt_type > 1: raise CoreError(error_message) type_definition = (COMPLEX_TYPE if "complexType" in element.tag else SIMPLE_TYPE) else: raise CoreError(error_message) return type_definition