def validate_xsd(xml_text: Union[bytes, str], xsd_file=None, xsd_content=None) -> etree._Element: """Validate an XML file""" xml_schema = compile_xsd(xsd_file=xsd_file, xsd_content=xsd_content) if isinstance(xml_text, str): xml_str = xml_text xml_bytes = xml_text.encode() else: xml_bytes = xml_text xml_str = xml_text.decode() try: xml_doc = etree.fromstring(xml_bytes) except etree.XMLSyntaxError as err: source_lines = xml_str.splitlines() raise etree.DocumentInvalid( f"{err.text} at {err.lineno}:{err.offset + 1} " f"in {err.filename} (source: {source_lines[err.lineno - 1].strip()})" ) from err if not xml_schema.validate(xml_doc): # Improve error message display, to ease debugging of XML data source_lines = xml_str.splitlines() raise etree.DocumentInvalid("\n".join([ f"{err.message} at {err.line}:{err.column} " f"in {err.path} (source: {source_lines[err.line - 1].strip()})" for err in xml_schema.error_log ])) return xml_doc
def validate_xml(xmltree, schema, error_header='File does not validate'): """ Checks a given xmltree against a schema and produces a nice error message with all the validation errors collected :param xmltree: xmltree of the file to validate :param schema: etree.XMLSchema to validate against :param error_header: str to lead a evtl error message with :raises: etree.DocumentInvalid if the schema does not validate """ from itertools import groupby try: cleared_tree, _ = clear_xml(xmltree) schema.assertValid(cleared_tree) except etree.DocumentInvalid as exc: error_log = sorted(schema.error_log, key=lambda x: x.message) error_output = [] first_occurence = [] for message, group in groupby(error_log, key=lambda x: x.message): err_occurences = list(group) error_message = f'Line {err_occurences[0].line}: {message}' error_lines = '' if len(err_occurences) > 1: error_lines = f"; This error also occured on the lines {', '.join([str(x.line) for x in err_occurences[1:]])}" error_output.append(f'{error_message}{error_lines} \n') first_occurence.append(err_occurences[0].line) error_output = [line for _, line in sorted(zip(first_occurence, error_output))] errmsg = f"{error_header}: \n{''.join(error_output)}" raise etree.DocumentInvalid(errmsg) from exc
def test_refuse_invalid_document(self, mock_validate): mock_validate.side_effect = etree.DocumentInvalid("some error") self.covered_status.append_to_first_tag_name( "nodes", '<node without="required attributes" />') assert_raise_library_error( lambda: ClusterState(str(self.covered_status)), (severities.ERROR, report_codes.BAD_CLUSTER_STATE_FORMAT, {}), )
def parse(self, register): try: self._validate(register) self._debug_parser(register) return self._get_parsed_register(register) except NotImplementedError as e: raise NotImplemented(e) except etree.DocumentInvalid as e: raise etree.DocumentInvalid(e)
def validate_xml_against_schema(xml_str, xml_schema_file): """ Validate an XML string against an XML schema definition :param xml_str str :param xml_schema_file str - valid file path to the XSD file """ with open(xml_schema_file, 'rb') as schema_file: schema_root = etree.XML(schema_file.read()) schema = etree.XMLSchema(schema_root) xml = etree.fromstring(xml_str) if not schema.validate(xml): raise etree.DocumentInvalid("Retrieved file does not validate against ISA configuration xsd") else: return etree.parse(StringIO(xml_str))
def modify_fleurinpdata(original, modifications, **kwargs): """ A CalcFunction that performs the modification of the given FleurinpData and stores the result in a database. :param original: a FleurinpData to be modified :param modifications: a python dictionary of modifications in the form of {'task': ...} :param kwargs: dict of other aiida nodes to be linked to the modifications :returns new_fleurinp: a modified FleurinpData that is stored in a database """ # copy # get schema # read in inp.xml # add modifications # validate # save inp.xml # store new fleurinp (copy) from aiida_fleur.tools.xml_util import clear_xml new_fleurinp = original.clone() modification_tasks = modifications.get_dict()['tasks'] xmlschema_doc = etree.parse(new_fleurinp._schema_file_path) xmlschema = etree.XMLSchema(xmlschema_doc) parser = etree.XMLParser(attribute_defaults=True, remove_blank_text=True) with new_fleurinp.open(path='inp.xml', mode='r') as inpxmlfile: tree = etree.parse(inpxmlfile, parser) try: xmlschema.assertValid(clear_xml(tree)) except etree.DocumentInvalid as exc: msg = 'Input file is not validated against the schema' print(msg) raise etree.DocumentInvalid(msg) from exc try: with new_fleurinp.open(path='n_mmp_mat', mode='r') as n_mmp_file: nmmplines = n_mmp_file.read().split('\n') except FileNotFoundError: nmmplines = None new_fleurtree, new_nmmplines = FleurinpModifier.apply_modifications(fleurinp_tree_copy=tree,\ nmmp_lines_copy=nmmplines,\ modification_tasks=modification_tasks) # To include object store storage this prob has to be done differently inpxmlfile_new = inpxmlfile.name.replace('inp.xml', 'temp_inp.xml') inpxmlfile.close() new_fleurtree.write(inpxmlfile_new, pretty_print=True) new_fleurinp.del_file('inp.xml') new_fleurinp._add_path(str(inpxmlfile_new), 'inp.xml') os.remove(inpxmlfile_new) if new_nmmplines: new_nmmp = bytes('\n'.join(new_nmmplines), 'utf-8') new_fleurinp._add_path(io.BytesIO(new_nmmp), 'n_mmp_mat') # default label and description new_fleurinp.label = 'mod_fleurinp' new_fleurinp.description = 'Fleurinpdata with modifications (see inputs of modify_fleurinpdata)' return new_fleurinp
def apply_modifications(fleurinp_tree_copy, nmmp_lines_copy, modification_tasks, schema_tree=None): """ Applies given modifications to the fleurinp lxml tree. It also checks if a new lxml tree is validated against schema. Does not rise an error if inp.xml is not validated, simple prints a message about it. :param fleurinp_tree_copy: a fleurinp lxml tree to be modified :param n_mmp_lines_copy: a n_mmp_mat file to be modified :param modification_tasks: a list of modification tuples :returns: a modified fleurinp lxml tree and a modified n_mmp_mat file """ from aiida_fleur.tools.xml_util import xml_set_attribv_occ, xml_set_first_attribv from aiida_fleur.tools.xml_util import xml_set_all_attribv, xml_set_text from aiida_fleur.tools.xml_util import xml_set_text_occ, xml_set_all_text from aiida_fleur.tools.xml_util import create_tag, replace_tag, delete_tag from aiida_fleur.tools.xml_util import delete_att, set_species from aiida_fleur.tools.xml_util import change_atomgr_att, add_num_to_att from aiida_fleur.tools.xml_util import change_atomgr_att_label, set_species_label from aiida_fleur.tools.xml_util import set_inpchanges, set_nkpts, set_kpath, shift_value from aiida_fleur.tools.xml_util import shift_value_species_label from aiida_fleur.tools.xml_util import clear_xml from aiida_fleur.tools.set_nmmpmat import set_nmmpmat, validate_nmmpmat def xml_set_attribv_occ1(fleurinp_tree_copy, xpathn, attributename, attribv, occ=None, create=False): if occ is None: occ = [0] xml_set_attribv_occ(fleurinp_tree_copy, xpathn, attributename, attribv, occ=occ, create=create) return fleurinp_tree_copy def xml_set_first_attribv1(fleurinp_tree_copy, xpathn, attributename, attribv, create=False): xml_set_first_attribv(fleurinp_tree_copy, xpathn, attributename, attribv, create=create) return fleurinp_tree_copy def xml_set_all_attribv1(fleurinp_tree_copy, xpathn, attributename, attribv, create=False): xml_set_all_attribv(fleurinp_tree_copy, xpathn, attributename, attribv, create=create) return fleurinp_tree_copy def xml_set_text1(fleurinp_tree_copy, xpathn, text, create=False): xml_set_text(fleurinp_tree_copy, xpathn, text, create=create) return fleurinp_tree_copy def xml_set_text_occ1(fleurinp_tree_copy, xpathn, text, create=False, occ=0): xml_set_text_occ(fleurinp_tree_copy, xpathn, text, create=create, occ=occ) return fleurinp_tree_copy def xml_set_all_text1(fleurinp_tree_copy, xpathn, text, create=False): xml_set_all_text(fleurinp_tree_copy, xpathn, text, create=create) return fleurinp_tree_copy def create_tag1(fleurinp_tree_copy, xpath, newelement, create=False): fleurinp_tree_copy = create_tag(fleurinp_tree_copy, xpath, newelement, create=create) return fleurinp_tree_copy def delete_att1(fleurinp_tree_copy, xpath, attrib): fleurinp_tree_copy = delete_att(fleurinp_tree_copy, xpath, attrib) return fleurinp_tree_copy def delete_tag1(fleurinp_tree_copy, xpath): fleurinp_tree_copy = delete_tag(fleurinp_tree_copy, xpath) return fleurinp_tree_copy def replace_tag1(fleurinp_tree_copy, xpath, newelement): fleurinp_tree_copy = replace_tag(fleurinp_tree_copy, xpath, newelement) return fleurinp_tree_copy def set_species1(fleurinp_tree_copy, species_name, attributedict, create=False): fleurinp_tree_copy = set_species(fleurinp_tree_copy, species_name, attributedict, create=create) return fleurinp_tree_copy def set_species2(fleurinp_tree_copy, at_label, attributedict, create=False): fleurinp_tree_copy = set_species_label(fleurinp_tree_copy, at_label, attributedict, create=create) return fleurinp_tree_copy def change_atomgr_att1(fleurinp_tree_copy, attributedict, position=None, species=None, create=False): fleurinp_tree_copy = change_atomgr_att(fleurinp_tree_copy, attributedict, position=position, species=species) return fleurinp_tree_copy def change_atomgr_att2(fleurinp_tree_copy, attributedict, atom_label, create=False): fleurinp_tree_copy = change_atomgr_att_label(fleurinp_tree_copy, attributedict, at_label=atom_label) return fleurinp_tree_copy def add_num_to_att1(fleurinp_tree_copy, xpathn, attributename, set_val, mode='abs', occ=None): if occ is None: occ = [0] fleurinp_tree_copy = add_num_to_att(fleurinp_tree_copy, xpathn, attributename, set_val, mode=mode, occ=occ) return fleurinp_tree_copy def set_inpchanges1(fleurinp_tree_copy, change_dict): fleurinp_tree_copy = set_inpchanges(fleurinp_tree_copy, change_dict) return fleurinp_tree_copy def shift_value1(fleurinp_tree_copy, change_dict, mode): fleurinp_tree_copy = shift_value(fleurinp_tree_copy, change_dict, mode) return fleurinp_tree_copy def shift_value_species_label1(fleurinp_tree_copy, label, att_name, value, mode): fleurinp_tree_copy = shift_value_species_label( fleurinp_tree_copy, label, att_name, value, mode) return fleurinp_tree_copy def set_nkpts1(fleurinp_tree_copy, count, gamma): fleurinp_tree_copy = set_nkpts(fleurinp_tree_copy, count, gamma) return fleurinp_tree_copy def set_kpath1(fleurinp_tree_copy, kpath, count, gamma): fleurinp_tree_copy = set_kpath(fleurinp_tree_copy, kpath, count, gamma) return fleurinp_tree_copy def set_kpointsdata1(fleurinp_tree_copy, kpointsdata_uuid): fleurinp_tree_copy = set_kpointsdata_f(fleurinp_tree_copy, kpointsdata_uuid) return fleurinp_tree_copy def set_nmmpmat1(fleurinp_tree_copy, nmmp_lines_copy, species_name, orbital,\ spin, occStates, denmat, phi, theta): nmmp_lines_copy = set_nmmpmat(fleurinp_tree_copy, nmmp_lines_copy, species_name, orbital,\ spin, occStates, denmat, phi, theta) return nmmp_lines_copy actions = { 'xml_set_attribv_occ': xml_set_attribv_occ1, 'xml_set_first_attribv': xml_set_first_attribv1, 'xml_set_all_attribv': xml_set_all_attribv1, 'xml_set_text': xml_set_text1, 'xml_set_text_occ': xml_set_text_occ1, 'xml_set_all_text': xml_set_all_text1, 'create_tag': create_tag1, 'replace_tag': replace_tag1, 'delete_tag': delete_tag1, 'delete_att': delete_att1, 'set_species': set_species1, 'set_species_label': set_species2, 'set_atomgr_att': change_atomgr_att1, 'set_atomgr_att_label': change_atomgr_att2, 'set_inpchanges': set_inpchanges1, 'shift_value': shift_value1, 'shift_value_species_label': shift_value_species_label1, 'set_nkpts': set_nkpts1, 'set_kpath': set_kpath1, 'set_kpointsdata': set_kpointsdata1, 'add_num_to_att': add_num_to_att1, 'set_nmmpmat': set_nmmpmat1 } workingtree = fleurinp_tree_copy workingnmmp = nmmp_lines_copy if schema_tree: #xmlschema_doc = etree.parse(new_fleurinp._schema_file_path) xmlschema = etree.XMLSchema(schema_tree) for task in modification_tasks: try: action = actions[task[0]] except KeyError as exc: raise ValueError('Unknown task {}'.format(task[0])) from exc if task[0] == 'set_nmmpmat': workingnmmp = action(workingtree, workingnmmp, *task[1:]) else: workingtree = action(workingtree, *task[1:]) if schema_tree: try: xmlschema.assertValid(clear_xml(workingtree)) except etree.DocumentInvalid as exc: msg = 'Changes were not valid: {}'.format(modification_tasks) print(msg) raise etree.DocumentInvalid(msg) from exc try: validate_nmmpmat(workingtree, workingnmmp) except ValueError as exc: msg = 'Changes were not valid (n_mmp_mat file is not compatible): {}'.format( modification_tasks) print(msg) raise ValueError(msg) from exc return workingtree, workingnmmp