def validate_gml_data(root_path, pattern, schema_file, tl): """ This function validates the XML meta data file against the XML schema and performs additional consistency checks. If the schema_file is None, the GML data file is validated against the XML schema files provided. @type root_path: string @param root_path: Root directory @type pattern: string @param pattern: pattern to search metadata @type tl: workers.TaskLogger @param tl: workers.TaskLogger @rtype: bool @return: Validity of GML data """ ns = { 'ogr': 'http://ogr.maptools.org/', 'gml': 'http://www.opengis.net/gml', 'xsi': 'http://www.w3.org/2001/XMLSchema-instance' } xmlval = XmlValidation() gml_data_files = [x for x in locate(pattern, root_path)] for ead in gml_data_files: logger.debug("Validating GML data file: %s" % strip_prefixes(ead, root_path)) # validate against xml schema result = xmlval.validate_XML_by_path(ead, schema_file) if schema_file is None: tl.addinfo( "Using schema files specified by the 'schemaLocation' attribute" ) else: tl.addinfo("Using schema: " % schema_file) if result.valid: logger.debug("GML data file '%s' successfully validated." % ead) else: if schema_file is None: tl.adderr( "Error validating against schemas using schema files specified by the 'schemaLocation' attribute:" ) else: tl.adderr("Error validating against schema '%s': %s" % (schema_file, result.err)) for err in result.err: tl.adderr("- %s" % str(err)) return False ead_tree = lxml.etree.parse(ead) return True
def getFileElements(self, deliveryDir, delivery_xml_file, schema_file): log = [] log.append("Validating delivery: %s using schema: %s" % (delivery_xml_file, schema_file)) try: # Parse the XML file, get the root element parsed_mets = ParsedMets(deliveryDir) parsed_mets.load_mets(delivery_xml_file) # If the XSD file wasn't found, extract location from the XML if schema_file == None: schema_file = parsed_mets.get_mets_schema_from_schema_location( ) # Parse the XSD file parsed_sfile = lxml.etree.parse(schema_file) # Validate the delivery XML file xmlVal = XmlValidation() validation_result = xmlVal.validate_XML(parsed_mets.mets_tree, parsed_sfile) if validation_result: return parsed_mets.get_file_elements() except (XMLSyntaxError), why: errmsg = 'Error validating delivery %s, why: %s' % ( delivery_xml_file, str(why))
def test_log(self): self.test_root = os.path.split( os.path.abspath(os.path.dirname(__file__)))[0] with open( os.path.join(self.test_root, '../../earkresources/PREMIS_skeleton.xml'), 'r') as premis_file: self.my_premis = Premis(premis_file) self.my_premis.add_agent('Aip2Dip') self.my_premis.add_event('Migration01', 'success', 'Aip2Dip') self.my_premis.add_object('file.txt') premis_xml = pretty_xml_string(self.my_premis.to_string()) print premis_xml xmlval = XmlValidation() parsed_xml = etree.fromstring(premis_xml) parsed_schema = etree.parse( '../../../earkresources/schemas/premis-v2-2.xsd') validation_result = xmlval.validate_XML(parsed_xml, parsed_schema) if len(validation_result.err) > 0: print validation_result.err self.assertTrue(validation_result.valid)
def submit_order(request): print 'received request' + request.method validator = XmlValidation() #{ "order_title" : "example title", "aip_identifiers" : [ "b7738768-032d-3db1-eb42-b09611e6e6c6", "916c659c-909d-ad94-2289-c7ee8e7482d9"]} if request.method == 'POST': order_json = json.loads(request.body) if "order_title" not in order_json: response = { 'process_id': None, 'error': "Missing order_title element in order request." } return HttpResponse(json.dumps(response)) if "aip_identifiers" not in order_json: response = { 'process_id': None, 'error': "Missing aip_identifiers element in order request." } return HttpResponse(json.dumps(response)) order_title = order_json["order_title"] aip_identifiers = order_json["aip_identifiers"] # verify that all necessary AIPs exist return error otherwise for aip_identifier in aip_identifiers: if InformationPackage.objects.filter( identifier=aip_identifier).count() == 0: response = { 'process_id': None, 'error': "Unknown IP for provided UUID %s" % aip_identifier } return HttpResponse(json.dumps(response)) try: dip = DIP.objects.create(name=order_title) except Exception as e: response = {'process_id': None, 'error': repr(e)} return HttpResponse(json.dumps(response)) process_id = getUniqueID() wf = WorkflowModules.objects.get(identifier=AIPtoDIPReset.__name__) InformationPackage.objects.create(path=os.path.join( config_path_work, process_id), uuid=process_id, statusprocess=0, packagename=order_title, last_task=wf) print "Created DIP with UUID %s" % aip_identifier for aip_identifier in aip_identifiers: # if entry does not exist in search_aip add it from earkweb_informationpackage if AIP.objects.filter(identifier=aip_identifier).count() == 0: ip = InformationPackage.objects.get(identifier=aip_identifier) aip = AIP.objects.create(identifier=aip_identifier, cleanid="", source=ip.storage_loc, date_selected=timezone.now()) aip = AIP.objects.get(identifier=aip_identifier) Inclusion(aip=aip, dip=dip).save() print "Added existing package %s" % aip_identifier response = {"process_id": process_id, 'status': 'Submitted'} return HttpResponse(json.dumps(response)) else: response = {'process_id': None, 'error': "Unsupported GET request."} return HttpResponse(json.dumps(response))
def validate_delivery(self, deliveryDir, delivery_xml_file, schema_file, package_file): """ Validate the delivery METS document. Does XML validation of the delivery METS file and fixity check on file level. @type deliveryDir: string @param deliveryDir: Path to delivery directory @type delivery_xml_file: string @param delivery_xml_file: Path to delivery METS file. @type package_file: string @param package_file: Path to package file file (e.g. TAR). @rtype: ValidationResult @return: Validation result (validity, process log, error log) """ valid = False log = [] err = [] valid_xml = False valid_checksum = False log.append( "Validating delivery: %s using schema: %s and package file %s" % (delivery_xml_file, schema_file, package_file)) try: # Parse the XML file, get the root element parsed_mets = ParsedMets(deliveryDir) parsed_mets.load_mets(delivery_xml_file) # If the XSD file wasn't found, extract location from the XML if schema_file == None: schema_file = parsed_mets.get_mets_schema_from_schema_location( ) # Parse the XSD file parsed_sfile = lxml.etree.parse(schema_file) # Validate the delivery XML file xmlVal = XmlValidation() validation_result = xmlVal.validate_XML(parsed_mets.mets_tree, parsed_sfile) valid_xml = validation_result # Checksum validation checksum_expected = ParsedMets.get_file_element_checksum( parsed_mets.get_first_file_element()) checksum_algorithm = ParsedMets.get_file_element_checksum_algorithm( parsed_mets.get_first_file_element()) csval = ChecksumValidation() valid_checksum = csval.validate_checksum( package_file, checksum_expected, ChecksumAlgorithm.get(checksum_algorithm)) # Mets validation mval = MetsValidation(parsed_mets) valid_files_size = mval.validate_files_size() log += validation_result.log err += validation_result.err log += valid_files_size.log err += valid_files_size.err log.append("Checksum validity: \"%s\"" % str(valid_checksum)) valid = (valid_xml.valid and valid_checksum and valid_files_size.valid) return ValidationResult(valid, log, err) except (XMLSyntaxError), why: errmsg = 'Error validating delivery %s, why: %s' % ( delivery_xml_file, str(why)) err.append(errmsg) return ValidationResult(False, log, err)
def validate_ead_metadata(root_path, pattern, schema_file, tl): """ This function validates the XML meta data file against the XML schema and performs additional consistency checks. If the schema_file is None, the EAD metadata file is validated against the XML schema files provided. @type root_path: string @param root_path: Root directory @type pattern: string @param pattern: pattern to search metadata @type tl: workers.TaskLogger @param tl: workers.TaskLogger @rtype: bool @return: Validity of EAD metadata """ # ead 2002: ns = {'ead': 'http://ead3.archivists.org/schema/', 'xlink': 'http://www.w3.org/1999/xlink', 'xsi': 'http://www.w3.org/2001/XMLSchema-instance'} ns = { 'ead': 'http://ead3.archivists.org/schema/', 'xsi': 'http://www.w3.org/2001/XMLSchema-instance' } xmlval = XmlValidation() ead_md_files = [x for x in locate(pattern, root_path)] for ead in ead_md_files: logger.debug("Validating EAD metadata file: %s" % strip_prefixes(ead, root_path)) # validate against xml schema result = xmlval.validate_XML_by_path(ead, schema_file) if schema_file is None: tl.addinfo( "Using schema files specified by the 'schemaLocation' attribute" ) else: tl.addinfo("Using schema: " % schema_file) if result.valid: logger.debug("Metadata file '%s' successfully validated." % ead) else: if schema_file is None: tl.adderr( "Error validating against schemas using schema files specified by the 'schemaLocation' attribute:" ) else: tl.adderr("Error validating against schema '%s': %s" % (schema_file, result.err)) for err in result.err: tl.adderr("- %s" % str(err)) return False ead_tree = lxml.etree.parse(ead) # check dao hrefs res = ead_tree.getroot().xpath('//ead:dao', namespaces=ns) if len(res) == 0: tl.addinfo("The EAD file does not contain any file references.") ead_dir, tail = os.path.split(ead) references_valid = True for dao in res: # ead 2002: dao_ref_file = os.path.join(ead_dir, remove_protocol(dao.attrib['{http://www.w3.org/1999/xlink}href'])) dao_ref_file = os.path.join(ead_dir, remove_protocol(dao.attrib['href'])) if not os.path.exists(dao_ref_file): references_valid = False tl.adderr( "DAO file reference error - File does not exist: %s" % dao_ref_file, False) # False if not references_valid: tl.adderr( "DAO file reference errors. Please consult the log file for details." ) return False return True